merge3 0.8 → 0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/merge3.rb.~1.2.~ +765 -0
- metadata +8 -4
@@ -0,0 +1,765 @@
|
|
1
|
+
#
|
2
|
+
# merge3.rb - a 3 way text merging tool
|
3
|
+
#
|
4
|
+
# Copyright 2004 Helsinki Institute for Information Technology (HIIT)
|
5
|
+
# and the authors. All rights reserved.
|
6
|
+
#
|
7
|
+
# Authors: Torsten Rueger <torsten@lightning.nu>
|
8
|
+
#
|
9
|
+
|
10
|
+
# Permission is hereby granted, free of charge, to any person
|
11
|
+
# obtaining a copy of this software and associated documentation files
|
12
|
+
# (the "Software"), to deal in the Software without restriction,
|
13
|
+
# including without limitation the rights to use, copy, modify, merge,
|
14
|
+
# publish, distribute, sublicense, and/or sell copies of the Software,
|
15
|
+
# and to permit persons to whom the Software is furnished to do so,
|
16
|
+
# subject to the following conditions:
|
17
|
+
#
|
18
|
+
# The above copyright notice and this permission notice shall be
|
19
|
+
# included in all copies or substantial portions of the Software.
|
20
|
+
#
|
21
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
22
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
23
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
24
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
25
|
+
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
26
|
+
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
27
|
+
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
28
|
+
# SOFTWARE.
|
29
|
+
#
|
30
|
+
# start : last week it was really nice weather
|
31
|
+
# one (+end) : last weekend it was really nice weather
|
32
|
+
# two (order) : it was really nice weather last week
|
33
|
+
#
|
34
|
+
# merged : it was really nice weather last weekend
|
35
|
+
|
36
|
+
# It's a simple example, but getting that merged result is not.
|
37
|
+
|
38
|
+
# it seems the standard diff/patch does not use move or copy as a command
|
39
|
+
# as a consequence, simple avoidable conflicts are produced as soon as
|
40
|
+
# move + edit touch the same data.
|
41
|
+
|
42
|
+
# this is not only surprising as diff/patch have been around for so long
|
43
|
+
# also the algorithms for detecting moves have been around for long:
|
44
|
+
# basically there are the two approaches of using add/delete
|
45
|
+
# or add/copy (or just copy). Diff/Patch just use the first, whereas the second
|
46
|
+
# would allow better patching.
|
47
|
+
|
48
|
+
# now the really interesting thing is that modern diff algorithms I know
|
49
|
+
# (xdelta + vcdiff) use copy. As these algorithms are used to work on
|
50
|
+
# 2 versions, A,B , producing a diff ab that then may be aplied to A and
|
51
|
+
# A only, the algorithms use copy not because it's better for 3-way merging,
|
52
|
+
# but because it's more efficient.
|
53
|
+
|
54
|
+
# This algorithm is a 3 way merge on text files.
|
55
|
+
# It turns out that it merges xml just fine though.
|
56
|
+
|
57
|
+
# A very rough scetch is:
|
58
|
+
# -- find the common parts of two files . We do this by a hashing technique
|
59
|
+
# once we have the common (copied) parts we add the added/deleted ones
|
60
|
+
# -- common parts are sorted according to both original nad edited file order
|
61
|
+
# -- merge by going through the list of parts. Start at one file and always
|
62
|
+
# swap to the other when there is a change in the other, a change from
|
63
|
+
# the original files order
|
64
|
+
|
65
|
+
# Note: While this does not concentrate performance, it is quite reasonable,
|
66
|
+
# meaning linear in file size and quadrtic in numer of edits.
|
67
|
+
# Performace may be tested with the speedy.rb file in test directory
|
68
|
+
#
|
69
|
+
|
70
|
+
DEBUG = false #toggle this to true to get a complete trace of what happens
|
71
|
+
|
72
|
+
|
73
|
+
# so here comes the main algorithm. We have the class to carry the original file
|
74
|
+
# and it's hash codes we need for the matching of the files
|
75
|
+
|
76
|
+
class Merge3
|
77
|
+
|
78
|
+
# make a three way merge, passing in strings and returning a string
|
79
|
+
# strings are byte arrays (not line arrays as in diff)
|
80
|
+
def Merge3::three_way( start_file , one_file , two_file , ignore_white_space = false )
|
81
|
+
# We also create chunks of the whole files, in case whitespace
|
82
|
+
# handling is on. Chunk::subchunk passed compressed whitespace on
|
83
|
+
start_chunk = Chunk.new( 0 , start_file.length , start_file )
|
84
|
+
one_chunk = Chunk.new( 0 , one_file.length , one_file )
|
85
|
+
two_chunk = Chunk.new( 0 , two_file.length , two_file )
|
86
|
+
if ignore_white_space
|
87
|
+
start_chunk.squeeze # all consecutive whitespace is then squeezed
|
88
|
+
one_chunk.squeeze # into one, and a talbe create to unsqueeze later
|
89
|
+
two_chunk.squeeze # Yes, this is expensive
|
90
|
+
end
|
91
|
+
# only create the macthes hash once for the start file
|
92
|
+
start_matches = Merge3::all_matches(start_chunk)
|
93
|
+
# Create the chunks( moves first , then added and deleted)
|
94
|
+
one_seq = Sequence.new( one_chunk , start_chunk , start_matches )
|
95
|
+
two_seq = Sequence.new( two_chunk , start_chunk , start_matches )
|
96
|
+
puts "Diff original 0" , one_seq if DEBUG
|
97
|
+
puts "Diff original 1" , two_seq if DEBUG
|
98
|
+
# break all chunks into non overlapping regions (in the original file order)
|
99
|
+
one_seq.no_overlap( two_seq )
|
100
|
+
two_seq.no_overlap( one_seq )
|
101
|
+
puts "Diff non-overlapping 0" , one_seq if DEBUG
|
102
|
+
puts "Diff non-overlapping 1" , two_seq if DEBUG
|
103
|
+
# and finally (and most simply) do the merge
|
104
|
+
Merge3::merge( one_seq , two_seq )
|
105
|
+
end
|
106
|
+
|
107
|
+
# given the Sequences of both files, attempt merging them.
|
108
|
+
# the chunks in the sequences are non-overlapping, so we can always work
|
109
|
+
# on whole chunks.
|
110
|
+
# In a sentence, the algorithm is now: Follow the line of change
|
111
|
+
def Merge3::merge( one_seq , two_seq )
|
112
|
+
# we go along the original file's order and change to the other sequence
|
113
|
+
# if there has been a change compared to the original file.
|
114
|
+
out = [] # this is the array of string to be outputted, the result
|
115
|
+
# this doesn't consider changes at the beginning of the file
|
116
|
+
# which just goes to prove that we're researchers
|
117
|
+
one_chunk = one_seq.chunks.first
|
118
|
+
two_chunk = two_seq.chunks.first
|
119
|
+
last_choosen = one_chunk
|
120
|
+
break_flag = nil
|
121
|
+
while break_flag.nil?
|
122
|
+
if DEBUG and one_chunk.from != two_chunk.from
|
123
|
+
#chunks are non overlapping, so they must be the same
|
124
|
+
raise "CHUNKS differ #{one_chunk}\n#{two_chunk}"
|
125
|
+
end
|
126
|
+
# we start with the next in the edited file order and then check where
|
127
|
+
next_one = one_seq.get_next( one_chunk ) # the change is
|
128
|
+
next_two = two_seq.get_next( two_chunk )
|
129
|
+
break_flag = true if next_one.nil? or next_two.nil? #eof
|
130
|
+
# so if the change is in the first file,
|
131
|
+
if break_flag.nil? and one_chunk.org_stop != next_one.from
|
132
|
+
raise "conflict at #{one_chunk.org_stop}" if two_chunk.org_stop != next_two.from
|
133
|
+
# we find the one in file two that starts where it (one) ends
|
134
|
+
next_two = two_seq.find_from( next_one.from )
|
135
|
+
puts "ONE #{one_chunk}\n #{two_chunk} nNEXT #{next_one}\n #{next_two}" if DEBUG
|
136
|
+
next_choosen = next_one
|
137
|
+
# otherwise the change must be in file two
|
138
|
+
elsif break_flag.nil?
|
139
|
+
# and we look for the chunk in one, that starts where two ends
|
140
|
+
next_one = one_seq.find_from( next_two.from )
|
141
|
+
puts "TWO #{two_chunk}\n #{one_chunk}\nNEXT #{next_two}\n #{next_one}" if DEBUG
|
142
|
+
next_choosen = next_two
|
143
|
+
else
|
144
|
+
next_one , next_two = nil ,nil
|
145
|
+
end
|
146
|
+
this_del = ( one_chunk.kind_of?(Deleted) or two_chunk.kind_of?(Deleted) )
|
147
|
+
# Output the associated (real==whitespace expanded) string if not deleted
|
148
|
+
out << last_choosen.real_string unless this_del
|
149
|
+
puts "OUT #{Chunk::short_string(one_chunk.real_string)}" if DEBUG and not this_del
|
150
|
+
# we ignore adds (from one file) that happen to fall in the middle of a deleted in the other
|
151
|
+
one_del = ( one_chunk.kind_of?( Deleted) and next_one.kind_of?( Deleted) )
|
152
|
+
two_del = ( two_chunk.kind_of?( Deleted) and next_two.kind_of?( Deleted) )
|
153
|
+
unless one_del or two_del
|
154
|
+
# otherwise we output added text from both files
|
155
|
+
puts "ADD one #{one_chunk.added}" if DEBUG and not one_chunk.added.nil?
|
156
|
+
out << one_chunk.added.real_string unless one_chunk.added.nil?
|
157
|
+
# and if both have added text create an asymetric result, hmm
|
158
|
+
puts "ADD two #{ two_chunk.added}" if DEBUG and not two_chunk.added.nil?
|
159
|
+
out << two_chunk.added.real_string unless two_chunk.added.nil?
|
160
|
+
end
|
161
|
+
one_chunk = next_one
|
162
|
+
two_chunk = next_two
|
163
|
+
last_choosen = next_choosen
|
164
|
+
end
|
165
|
+
puts "RESULT" , out.join , "END" if DEBUG
|
166
|
+
return out.join # joins the pieces and returns the merged string
|
167
|
+
end
|
168
|
+
|
169
|
+
# build a hash of chunks( hash of the chunks string against the chunk).
|
170
|
+
# This is the backbone of the matching algorithm, so that finding a match
|
171
|
+
# is basically a hash lookup
|
172
|
+
# Matches may be of different sizes, we use 16,32,48 and -1 for line matching
|
173
|
+
def Merge3::matches( chunk , size )
|
174
|
+
if size == -1 then
|
175
|
+
return Merge3::match_newlines( chunk )
|
176
|
+
else
|
177
|
+
return Merge3::match( chunk , size )
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
# collect all matches (for the start file, so it doesn't have to be done twice)
|
182
|
+
def Merge3::all_matches( chunk )
|
183
|
+
# order counts, so large chunks overwrites small for hash collisions
|
184
|
+
matches = Merge3::match(chunk,16)
|
185
|
+
matches.update(match(chunk,32))
|
186
|
+
matches.update(match(chunk,48))
|
187
|
+
matches.update(match_newlines(chunk))
|
188
|
+
matches
|
189
|
+
end
|
190
|
+
|
191
|
+
# find matches according to newline seperation.
|
192
|
+
def Merge3::match_newlines( chunk )
|
193
|
+
match = {} # the hash of matches , using the strings as key, chunk as value
|
194
|
+
index = 0 #count through the string length
|
195
|
+
if chunk.whitespace.empty? # whitespace compression on or not ?
|
196
|
+
# because there are only newline if whitespace compression is off
|
197
|
+
chunk.str.split("\n").each do |line|
|
198
|
+
# this logic strips the whitespace off both sides of the line, in an
|
199
|
+
# effort to match things like changed indentation
|
200
|
+
offset = 0
|
201
|
+
offset += 1 while line[offset,1] =~ /\s/
|
202
|
+
index += offset
|
203
|
+
line.strip!
|
204
|
+
cop = chunk.subchunk( index , line.length , index )
|
205
|
+
match[cop.str] = cop # and store in our hash (no collision detection)
|
206
|
+
#puts "LINE #{index} #{line.length} #{offset}--#{cop.str}--"
|
207
|
+
index += line.length + 1 # 1 is the newline
|
208
|
+
end
|
209
|
+
else #whitespace compression is on, so the whitespace table contains
|
210
|
+
# the newlines, just have to find them
|
211
|
+
chunk.whitespace.each do | at , str |
|
212
|
+
#puts "###{str}##"
|
213
|
+
next unless str.include?( "\n" )
|
214
|
+
cop = chunk.subchunk( index + 1 , at - index , index )
|
215
|
+
index =at
|
216
|
+
#puts "###{cop.str}##" if cop.str.include?("Four")
|
217
|
+
match[cop.str] = cop # and store in our hash (no collision detection)
|
218
|
+
end
|
219
|
+
end
|
220
|
+
match
|
221
|
+
end
|
222
|
+
|
223
|
+
# find matches for around the given length. Exactly the length for binary files,
|
224
|
+
# but if there is whitespace, we use that to break within 70% of the given
|
225
|
+
# value
|
226
|
+
def Merge3::match( chunk , around )
|
227
|
+
match = {} # the hash of matches , using the strings as key, chunks as value
|
228
|
+
index = 0 #count through the string length
|
229
|
+
while index < chunk.length # also possible here to do half from the
|
230
|
+
left = chunk.length - index #front and half from the back (?)
|
231
|
+
if left > 0.7*around and left <= 1.5*around # are we at the end
|
232
|
+
len = chunk.length - index
|
233
|
+
else
|
234
|
+
sub = chunk.str[index , 1.5*around] # take the maximum length we want
|
235
|
+
if (white = sub.index( /\W/ , 0.7 * around )).nil? # and check for non char
|
236
|
+
len = around # either taking the whole
|
237
|
+
else #or the "words" we found
|
238
|
+
len = white #hoping this is better for text, so we have no allignment
|
239
|
+
end # issues
|
240
|
+
end #then create a chunk with the indexes and string
|
241
|
+
cop = chunk.subchunk( index , len , index )
|
242
|
+
match[cop.str] = cop # and store in our hash (no collision detection)
|
243
|
+
index += len
|
244
|
+
end
|
245
|
+
# now we only take one from the back,because that's often a match and will be
|
246
|
+
at = (chunk.length - 0.7*around).to_i
|
247
|
+
cop = chunk.subchunk( at , (0.7*around).to_i , at )
|
248
|
+
match[cop.str] = cop
|
249
|
+
match
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
# A Sequence represents a sequence of chunks , in other words the modified file
|
254
|
+
# broken into the chunks that were found
|
255
|
+
|
256
|
+
# the sequence keeps the chunks as lists, one in edited file order
|
257
|
+
# and one in original file order
|
258
|
+
class Sequence
|
259
|
+
attr :chunks # the list of chunks, sorted in order of the edited file
|
260
|
+
attr :org_chunks # the list of chunks, sorted in order of the original file
|
261
|
+
|
262
|
+
# output both lists of chunks, edited and original, with chunks and numbers
|
263
|
+
# but strings only up to 70 chars (for readability)
|
264
|
+
def to_s
|
265
|
+
ret = ""
|
266
|
+
@chunks.each_with_index do | chunk , index |
|
267
|
+
ret += index.to_s + " - " + chunk.to_s + "\n"
|
268
|
+
end
|
269
|
+
@org_chunks.each_with_index do | chunk , index |
|
270
|
+
ret += index.to_s + " - " + chunk.to_s + "\n"
|
271
|
+
end
|
272
|
+
return ret
|
273
|
+
end
|
274
|
+
|
275
|
+
# find the difference from (file, or byte array) too to start
|
276
|
+
# (matches are precalculated matches for start ) start.
|
277
|
+
# Collect the differences as chunks (copies or adds) and mark deletes
|
278
|
+
# The algorithm is "greedy", meaning if it finds a match (using the
|
279
|
+
# matches) it tries to extend in both directions
|
280
|
+
def initialize too_chunk , start_chunk , start_matches
|
281
|
+
@chunks = []
|
282
|
+
@org_chunks = []
|
283
|
+
# run by length to get big (sure) hits first (-1 == newlines)
|
284
|
+
[ -1 , 48 , 32, 16 ].each do |c_size|
|
285
|
+
two_matches = Merge3::matches(too_chunk , c_size).each do |key , two|
|
286
|
+
next if (start = start_matches[two.str]).nil?
|
287
|
+
raise "Keys collide :#{start}" if start.str != two.str
|
288
|
+
next if done?( two , start )
|
289
|
+
# here comes the greedy part, first left then right
|
290
|
+
two_start , start_start , len = two.start , start.start , two.length
|
291
|
+
puts two , "MINUS" if len <= 0 and DEBUG
|
292
|
+
start_rim , edit_rim = find_left_rim( start_start , two_start )
|
293
|
+
while ( start_chunk.str[start_start - 1] == too_chunk.str[two_start - 1] ) and
|
294
|
+
( edit_rim < two_start ) and ( start_rim < start_start )
|
295
|
+
two_start -= 1
|
296
|
+
start_start -= 1
|
297
|
+
len += 1 # to keep the right end where it was
|
298
|
+
end
|
299
|
+
start_rim , edit_rim = find_right_rim( start_start + len , two_start + len , start_chunk.length , too_chunk.length )
|
300
|
+
while ( start_chunk.str[start_start + len ] == too_chunk.str[two_start + len ] ) and
|
301
|
+
((len + start_start) < start_rim) and ((two_start + len) < edit_rim)
|
302
|
+
len += 1
|
303
|
+
end
|
304
|
+
chunk = too_chunk.subchunk(two_start,len , start_start )
|
305
|
+
puts "Matched #{chunk}" if DEBUG
|
306
|
+
add_chunk( chunk )
|
307
|
+
end
|
308
|
+
end
|
309
|
+
# now find the parts that were deleted
|
310
|
+
# (gaps in the matching of the original file)
|
311
|
+
each_org_pair do | org , next_org |
|
312
|
+
if org.org_stop < next_org.from
|
313
|
+
del_str = start_chunk.str[org.org_stop , next_org.from - org.org_stop ]
|
314
|
+
puts "DELETED #{org.org_stop} --#{del_str}--" if DEBUG
|
315
|
+
del = Deleted.new( org.stop , org.org_stop , del_str)
|
316
|
+
add_chunk( del )
|
317
|
+
end
|
318
|
+
end
|
319
|
+
# now find the parts that were added (gaps in the matching of the edited file)
|
320
|
+
adds = {}
|
321
|
+
each_pair do |chunk , next_chunk |
|
322
|
+
if chunk.stop < next_chunk.start
|
323
|
+
add = too_chunk.subchunk( chunk.stop , next_chunk.start - chunk.stop , -1 )
|
324
|
+
puts "ADDING #{add} " if DEBUG
|
325
|
+
chunk.added = add # hang the add onto the chunk
|
326
|
+
# this following logic has fixed some cases, where the matching had
|
327
|
+
# been somewhat unintuitive. Though correct it produced
|
328
|
+
# unneccessary conflicts, in conjunction with deletes
|
329
|
+
while add.str[0] == next_chunk.str[0] and
|
330
|
+
add.str[0] != 32 and # not spaces, avoid whitespace headaches
|
331
|
+
chunk.org_stop == next_chunk.from
|
332
|
+
puts "before: #{chunk} \nNext:#{next_chunk}" if DEBUG
|
333
|
+
add.rotate #put the first to the end
|
334
|
+
remove_chunk( next_chunk )
|
335
|
+
# move the first from the next to the end of the last
|
336
|
+
chunk.push( next_chunk.pop )
|
337
|
+
puts "after: #{chunk} \nNext:#{next_chunk}" if DEBUG
|
338
|
+
add_chunk( next_chunk )
|
339
|
+
end unless chunk.kind_of?(Deleted) or next_chunk.kind_of?(Deleted)
|
340
|
+
end
|
341
|
+
end
|
342
|
+
end
|
343
|
+
|
344
|
+
# helper to iterate over each pair in the edited file order
|
345
|
+
def each_pair
|
346
|
+
return if @chunks.length < 2
|
347
|
+
idx = 1
|
348
|
+
chunk = @chunks[0]
|
349
|
+
while idx < @chunks.length
|
350
|
+
next_chunk = @chunks[idx]
|
351
|
+
yield(chunk , next_chunk)
|
352
|
+
chunk = next_chunk
|
353
|
+
idx = idx + 1
|
354
|
+
end
|
355
|
+
end
|
356
|
+
|
357
|
+
# helper to iterate over each pair in the original file order
|
358
|
+
def each_org_pair
|
359
|
+
return if @org_chunks.length < 2
|
360
|
+
idx = 1
|
361
|
+
chunk = @org_chunks[0]
|
362
|
+
while idx < @org_chunks.length
|
363
|
+
next_chunk = @org_chunks[idx]
|
364
|
+
yield(chunk , next_chunk)
|
365
|
+
chunk = next_chunk
|
366
|
+
idx = idx + 1
|
367
|
+
end
|
368
|
+
end
|
369
|
+
|
370
|
+
# get the next chunk in edited file order
|
371
|
+
def get_next( prev )
|
372
|
+
idx = @chunks.index prev
|
373
|
+
return nil unless idx
|
374
|
+
return nil if idx == @chunks.length - 1
|
375
|
+
nekst = @chunks[idx + 1 ]
|
376
|
+
return nekst
|
377
|
+
end
|
378
|
+
|
379
|
+
# add this chunk. This implementation keeps the order of both
|
380
|
+
# arrays correct all the time
|
381
|
+
def add_chunk( chunk )
|
382
|
+
raise "Nil added " if not chunk
|
383
|
+
raise "False added " if chunk == false
|
384
|
+
@chunks.push( chunk )
|
385
|
+
@chunks.sort! { |a , b |
|
386
|
+
ret = a.start <=> b.start
|
387
|
+
# so this adds the Deleted chunks into the edited file too
|
388
|
+
if ret == 0
|
389
|
+
ret = -1 if a.kind_of? Deleted
|
390
|
+
ret = 1 if b.kind_of? Deleted
|
391
|
+
end
|
392
|
+
ret
|
393
|
+
}
|
394
|
+
@org_chunks.push(chunk)
|
395
|
+
@org_chunks.sort! do |a , b | a.from <=> b.from end
|
396
|
+
end
|
397
|
+
|
398
|
+
def remove_chunk( chunk ) # for rotation, will be readded
|
399
|
+
@chunks.delete(chunk)
|
400
|
+
@org_chunks.delete(chunk)
|
401
|
+
end
|
402
|
+
|
403
|
+
#find the chunk that starts at the given position in the original file
|
404
|
+
def find_from pos
|
405
|
+
each_org do | chunk |
|
406
|
+
#puts "FOUND for #{pos} #{chunk}"
|
407
|
+
return chunk if chunk.from == pos
|
408
|
+
end
|
409
|
+
raise "Not found #{pos} in\n#{self}"
|
410
|
+
# hmmm this was here for some reason I don't recall
|
411
|
+
# return chunk.next if chunk.added and chunk.added.start == pos
|
412
|
+
end
|
413
|
+
|
414
|
+
# find the chunk left to these positions (original and edited) and return
|
415
|
+
# the maximum bound a new chunk can expand too in both coordinates.
|
416
|
+
def find_left_rim org , pos
|
417
|
+
start_rim , two_rim = 0 ,0
|
418
|
+
each_org do | two |
|
419
|
+
start_rim = two.org_stop if start_rim <= two.org_stop and two.org_stop <= org
|
420
|
+
end
|
421
|
+
each do | one |
|
422
|
+
two_rim = one.stop if two_rim <= one.stop and one.stop <= pos
|
423
|
+
end
|
424
|
+
#puts "Left rim #{org} #{pos} is #{start_rim} #{two_rim}" if DEBUG
|
425
|
+
return start_rim , two_rim
|
426
|
+
end
|
427
|
+
|
428
|
+
# find the chunk right to these positions (original and edited) and return
|
429
|
+
# the maximum bound a new chunk can expand too in both coordinates.
|
430
|
+
def find_right_rim start , two , start_start , two_start
|
431
|
+
start_rim , two_rim = start_start , two_start
|
432
|
+
each_org do | tw |
|
433
|
+
start_rim = tw.from if start <= tw.from and tw.from <= start_rim
|
434
|
+
end
|
435
|
+
each do | one |
|
436
|
+
two_rim = one.start if two <= one.start and one.start <= two_rim
|
437
|
+
end
|
438
|
+
#puts "\Right rim #{start} #{two} is #{start_rim} #{two_rim}" if DEBUG
|
439
|
+
return start_rim , two_rim
|
440
|
+
end
|
441
|
+
|
442
|
+
# that part is done if the are (start-stop) is already included in the
|
443
|
+
# patches (first if) or the org part overlaps any of the patches org's
|
444
|
+
# the second case is then a copy
|
445
|
+
def done? two , org
|
446
|
+
#puts "checking done? \n #{two} \n #{org} "
|
447
|
+
each do |chunk|
|
448
|
+
return true if chunk.overlaps? two
|
449
|
+
end
|
450
|
+
each_org do |chunk|
|
451
|
+
return true if chunk.org_overlaps? org
|
452
|
+
end
|
453
|
+
#puts "NOT DONE? : \n #{self}"
|
454
|
+
false
|
455
|
+
end
|
456
|
+
|
457
|
+
# yields all chunks in the sequence in edited file order
|
458
|
+
def each
|
459
|
+
@chunks.each do |chunk|
|
460
|
+
yield chunk if chunk
|
461
|
+
end
|
462
|
+
end
|
463
|
+
|
464
|
+
# yields all chunks in original file order
|
465
|
+
def each_org
|
466
|
+
@org_chunks.each do |chunk|
|
467
|
+
yield chunk if chunk
|
468
|
+
end
|
469
|
+
end
|
470
|
+
|
471
|
+
# change all chunks in this sequence so that they don't have overlaps with
|
472
|
+
# any of the chunks in the given sequence. done both ways results in
|
473
|
+
# non-overlapping sequences
|
474
|
+
def no_overlap sequence
|
475
|
+
each_org do |one|
|
476
|
+
sequence.each_org do |two|
|
477
|
+
break_at( one , two.from ) if one.org_includes?( two.from )
|
478
|
+
break_at( one , two.org_stop ) if one.org_includes?( two.org_stop )
|
479
|
+
end
|
480
|
+
end
|
481
|
+
end
|
482
|
+
|
483
|
+
# split in two at pos (thus assuming pos is included) and link the
|
484
|
+
def break_at( chunk , pos )
|
485
|
+
return if pos == chunk.from or pos == chunk.org_stop
|
486
|
+
old_s = chunk.to_s
|
487
|
+
new_chunk = chunk.split_at_org!(pos)
|
488
|
+
if not new_chunk.kind_of? Deleted
|
489
|
+
new_chunk.added = chunk.added
|
490
|
+
chunk.added = nil
|
491
|
+
end
|
492
|
+
#puts "SPLIT #{pos}\nBEFORE #{old_s}\nOLD:#{chunk} \nNEW:#{new_chunk} \n" if DEBUG
|
493
|
+
idx = @chunks.index(chunk)
|
494
|
+
@chunks[idx+1,0] = new_chunk
|
495
|
+
idx = @org_chunks.index(chunk)
|
496
|
+
@org_chunks[idx+1,0] = new_chunk
|
497
|
+
end
|
498
|
+
|
499
|
+
end
|
500
|
+
|
501
|
+
# this is a little holder class (struct?) that represents a piece of text in
|
502
|
+
# a file.
|
503
|
+
|
504
|
+
class Chunk
|
505
|
+
# the byte index into the file where the string is in (the edited file)
|
506
|
+
attr_reader :start
|
507
|
+
#length of the string (somewhat redundant with the string below)
|
508
|
+
attr_reader :length
|
509
|
+
# the actual string. This is just here for convinience, it's in the file
|
510
|
+
attr_reader :str
|
511
|
+
# the index into the original file where the string starts, -1 for adds
|
512
|
+
attr_reader :from
|
513
|
+
attr :added , true # a chunk that was added (if there is such a one)
|
514
|
+
attr :whitespace , true # the table for whitespace
|
515
|
+
|
516
|
+
# cut the chunk to max 71 characters for readable output
|
517
|
+
def Chunk::short_string( s )
|
518
|
+
if s.length < 71
|
519
|
+
return "--" + s.to_s + "--"
|
520
|
+
else
|
521
|
+
return "--" + s[0,20] + "...Cut #{s.length-40} chars..." + s[s.length - 20 , 20] + "--"
|
522
|
+
end
|
523
|
+
end
|
524
|
+
|
525
|
+
def initialize( start , length , string , from = -1)
|
526
|
+
raise "Error nil string passed start=#{start} length=#{length} str=-#{str}=" if string.nil?
|
527
|
+
@start , @length , @str , @from = start , length , string , from
|
528
|
+
@whitespace = []
|
529
|
+
end
|
530
|
+
|
531
|
+
# outputs all attributes and a readable version of the string
|
532
|
+
# also whitespace if present
|
533
|
+
def to_s #for debug output
|
534
|
+
st = "start=#{start} len=#{length} stop=#{stop} from=#{from} org_stop=#{org_stop} "
|
535
|
+
st += Chunk::short_string(real_string)
|
536
|
+
st += " \n added=#{added} " unless added.nil?
|
537
|
+
st += " \n whitespace=#{@whitespace.length} " unless @whitespace.empty?
|
538
|
+
ws_count = 0
|
539
|
+
@whitespace.each do | start , s |
|
540
|
+
st += "-" + start.to_s + " " + s.length.to_s + " "
|
541
|
+
st += "n " if s.include?( "\n" )
|
542
|
+
ws_count += 1
|
543
|
+
break if ws_count > 5
|
544
|
+
end
|
545
|
+
st
|
546
|
+
end
|
547
|
+
|
548
|
+
# put the first character at the end
|
549
|
+
def rotate
|
550
|
+
@str = @str[1..-1] + @str[0,1]
|
551
|
+
@start += 1
|
552
|
+
end
|
553
|
+
|
554
|
+
# puts the char at the end of the string, adjusting the length
|
555
|
+
# add the whitespace if it's not nil
|
556
|
+
def push( arg )
|
557
|
+
char , space = arg
|
558
|
+
@str << char
|
559
|
+
@length += 1
|
560
|
+
@whitespace << space unless space.nil?
|
561
|
+
end
|
562
|
+
|
563
|
+
# return the first char, and remove it from the string, adjusting length + start
|
564
|
+
# also return any whitespace entry, if there is such a thing
|
565
|
+
def pop
|
566
|
+
white = nil
|
567
|
+
if (not @whitespace.empty?) and (@whitespace.first[0] == @start )
|
568
|
+
white = @whitespace.delete_at(0)
|
569
|
+
puts "White --#{white}--#{@start}"
|
570
|
+
end
|
571
|
+
char = @str[0]
|
572
|
+
@str = @str[1..-1]
|
573
|
+
@length -= 1
|
574
|
+
@start += 1
|
575
|
+
@from += 1 if @from != -1
|
576
|
+
return [ char , white ]
|
577
|
+
end
|
578
|
+
|
579
|
+
# squeeze the whitespace, ie for all sequences of whitespace(space,tab,newline),
|
580
|
+
# and all non space whitespaces, replace it with a single whitespace and record
|
581
|
+
# the change in the array
|
582
|
+
def squeeze
|
583
|
+
old = @str.dup
|
584
|
+
@whitespace = []
|
585
|
+
at = 0
|
586
|
+
while ( index = @str.index(/\s+/ , at ) )
|
587
|
+
at = index + $&.length
|
588
|
+
if $& != ' ' # do nothing for single spaces
|
589
|
+
@whitespace.push [ index , $& ]
|
590
|
+
@str[index , $&.length] = ' '
|
591
|
+
end
|
592
|
+
end
|
593
|
+
@length = @str.length
|
594
|
+
throw old if DEBUG and old != real_string
|
595
|
+
end
|
596
|
+
|
597
|
+
# unsqueeze whitespace if present
|
598
|
+
def real_string
|
599
|
+
return @str if @whitespace.empty?
|
600
|
+
stri = @str.dup
|
601
|
+
#puts "--" + str + "--" + @str.length.to_s
|
602
|
+
begin
|
603
|
+
@whitespace.reverse.each do | index , st |
|
604
|
+
stri[index - @start ,1] = st
|
605
|
+
end
|
606
|
+
rescue
|
607
|
+
st = "start=#{start} len=#{length} stop=#{stop} from=#{from} org_stop=#{org_stop} " + @str
|
608
|
+
st += " \n whitespace=#{@whitespace.length} "
|
609
|
+
@whitespace.each do | start , s |
|
610
|
+
st += "-" + start.to_s + " " + s.length.to_s + " "
|
611
|
+
st += "n " if s.include?( "\n" )
|
612
|
+
end
|
613
|
+
puts st
|
614
|
+
raise
|
615
|
+
end
|
616
|
+
stri
|
617
|
+
end
|
618
|
+
|
619
|
+
# get a substring from the chunk and carry whitespace table accross
|
620
|
+
# start is in the files coordinates (not the strings of the chunk)
|
621
|
+
def subchunk( startt , len , fromm )
|
622
|
+
stri = @str[ startt - @start , len ]
|
623
|
+
table = []
|
624
|
+
@whitespace.each do | arr |
|
625
|
+
table << arr if arr[0] >= startt and arr[0] < ( startt + len )
|
626
|
+
end
|
627
|
+
chunk = Chunk.new( startt , len , stri , fromm )
|
628
|
+
chunk.whitespace = table
|
629
|
+
#puts "SUB at #{startt} #{len} #{chunk}" if fromm == 62
|
630
|
+
chunk
|
631
|
+
end
|
632
|
+
|
633
|
+
# index of where the string ends.
|
634
|
+
# this should be called end, but that's a keyword.
|
635
|
+
def stop
|
636
|
+
@start + @length
|
637
|
+
end
|
638
|
+
|
639
|
+
# index of where the string ends in the original file (nonsense for adds)
|
640
|
+
def org_stop
|
641
|
+
@from + @length
|
642
|
+
end
|
643
|
+
|
644
|
+
def add? # adds are not copied, hence have no from attribute.
|
645
|
+
@from == -1 # a copy carries a positive number as offset into the original
|
646
|
+
end
|
647
|
+
|
648
|
+
# is the point (char index) in the original copied string
|
649
|
+
def org_includes? point
|
650
|
+
( @from != -1 ) and ( @from <= point ) and ( point < org_stop )
|
651
|
+
end
|
652
|
+
|
653
|
+
def includes_copy? copy # is the copy chunk fully inside this
|
654
|
+
(copy.start >= @start) and (copy.stop <= stop)
|
655
|
+
end
|
656
|
+
|
657
|
+
# return a substring in the original files coordinates
|
658
|
+
def rstring( fro , to )
|
659
|
+
to = org_stop if to == -1
|
660
|
+
throw "error #{fro} #{to} #{self}" if fro < @from or to > org_stop
|
661
|
+
@length -= to - fro
|
662
|
+
@str[ fro - @from , to - @from ]
|
663
|
+
end
|
664
|
+
|
665
|
+
# does some part of two overlap. always takes me 5 minutes to get
|
666
|
+
def overlaps? two # but a drawing helps
|
667
|
+
start > two.start ? start < two.stop : stop > two.start
|
668
|
+
end
|
669
|
+
|
670
|
+
#does any part of the original overlap (from - org_stop )
|
671
|
+
def org_overlaps? two
|
672
|
+
from > two.from ? from < two.org_stop : org_stop > two.from
|
673
|
+
end
|
674
|
+
|
675
|
+
# this joins two chunks, which are presumed to be adds (so no fiddling with from)
|
676
|
+
# self is changed and the argument untouched (to be deleted)
|
677
|
+
def join_adds add
|
678
|
+
@str += add.str
|
679
|
+
@length = @str.length
|
680
|
+
end
|
681
|
+
|
682
|
+
#split the chunk into two at the given position,
|
683
|
+
# change this and return the right one
|
684
|
+
def split_at_org! pos
|
685
|
+
#puts "SPLIT #{pos} #{self}"
|
686
|
+
throw "position not in chunk #{pos} : #{self}" unless org_includes?( pos )
|
687
|
+
left_length = pos - @from
|
688
|
+
right_length = org_stop - pos
|
689
|
+
right = Chunk.new( @start + left_length , right_length , @str[left_length, right_length] , pos )
|
690
|
+
right.whitespace = @whitespace.dup.delete_if do | index , s |
|
691
|
+
index < right.start
|
692
|
+
end
|
693
|
+
#puts " split #{right} "
|
694
|
+
throw "Right split error #{self} , #{right} :#{pos}" if right.length != right_length
|
695
|
+
@str = @str[0, left_length]
|
696
|
+
raise "Error nil string =#{start} length=#{length} str=-#{@str}=" if @str.nil?
|
697
|
+
@length = @str.length
|
698
|
+
@whitespace.delete_if do | index , s |
|
699
|
+
index >= right.start
|
700
|
+
end
|
701
|
+
#puts " white #{@whitespace} "
|
702
|
+
#puts " left #{self} "
|
703
|
+
throw "Left split error #{self} , #{right} :#{pos} :#{left_length}" if @length != left_length
|
704
|
+
return right
|
705
|
+
end
|
706
|
+
|
707
|
+
end
|
708
|
+
|
709
|
+
# a seperate class for deleted chunks (for destinction)
|
710
|
+
class Deleted < Chunk
|
711
|
+
|
712
|
+
def initialize start , from , string
|
713
|
+
@start = start
|
714
|
+
@length = string.length
|
715
|
+
@str = string
|
716
|
+
@from = from
|
717
|
+
@whitespace = []
|
718
|
+
raise "Error nil string =#{start} length=#{length} str=-#{@str}=" if string.nil?
|
719
|
+
end
|
720
|
+
|
721
|
+
def stop # no length in the modified file
|
722
|
+
@start
|
723
|
+
end
|
724
|
+
|
725
|
+
def split_at_org! pos
|
726
|
+
raise "position not in chunk #{pos} : #{self}" unless org_includes?( pos )
|
727
|
+
left_length = pos - @from
|
728
|
+
right_length = org_stop - pos
|
729
|
+
right = Deleted.new( @start + left_length , pos , @str[left_length, @length] )
|
730
|
+
raise "Back to the right drawingboard #{self} , #{right} :#{pos}" if right.length != right_length
|
731
|
+
@str = @str[0, left_length]
|
732
|
+
@length = @str.length
|
733
|
+
raise "Back to the left drawingboard #{self} , #{right} :#{pos} :#{left_length}" if @length != left_length
|
734
|
+
raise "Error nil string =#{start} length=#{length} str=-#{@str}=" if @str.nil?
|
735
|
+
return right
|
736
|
+
end
|
737
|
+
|
738
|
+
def to_s
|
739
|
+
super.to_s + " deleted"
|
740
|
+
end
|
741
|
+
|
742
|
+
end
|
743
|
+
|
744
|
+
def main()
|
745
|
+
|
746
|
+
ignore_whitespace = false
|
747
|
+
|
748
|
+
args = ARGV.dup
|
749
|
+
|
750
|
+
if args[0] == "-w"
|
751
|
+
ignore_whitespace = true
|
752
|
+
args.shift
|
753
|
+
end
|
754
|
+
start = File.new( args.shift ).read
|
755
|
+
one = File.new( args.shift ).read
|
756
|
+
two = File.new( args.shift ).read
|
757
|
+
|
758
|
+
result = Merge3::three_way( start , one , two , ignore_whitespace )
|
759
|
+
|
760
|
+
puts result
|
761
|
+
|
762
|
+
end
|
763
|
+
|
764
|
+
main() if $0 == __FILE__
|
765
|
+
|
metadata
CHANGED
@@ -1,11 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.8.
|
2
|
+
rubygems_version: 0.8.11
|
3
3
|
specification_version: 1
|
4
4
|
name: merge3
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: "0.
|
7
|
-
date:
|
8
|
-
summary:
|
6
|
+
version: "0.9"
|
7
|
+
date: 2006-05-03 00:00:00 +03:00
|
8
|
+
summary: "Three way merge, including recognition of moves and edit in moves, unlike
|
9
|
+
traditional tools"
|
9
10
|
require_paths:
|
10
11
|
- lib
|
11
12
|
email:
|
@@ -24,9 +25,12 @@ required_ruby_version: !ruby/object:Gem::Version::Requirement
|
|
24
25
|
version: "1.6"
|
25
26
|
version:
|
26
27
|
platform: ruby
|
28
|
+
signing_key:
|
29
|
+
cert_chain:
|
27
30
|
authors: []
|
28
31
|
files:
|
29
32
|
- lib/merge3.rb
|
33
|
+
- lib/merge3.rb.~1.2.~
|
30
34
|
test_files: []
|
31
35
|
rdoc_options: []
|
32
36
|
extra_rdoc_files: []
|