RubyGems - merge3 - Versions diffs - 0.8 → 0.9 - Mend

merge3 0.8 → 0.9

Files changed (2) hide show

data/lib/merge3.rb.~1.2.~ +765 -0
metadata +8 -4

@@ -0,0 +1,765 @@
+#
+# merge3.rb   - a 3 way text merging tool
+#
+# Copyright 2004 Helsinki Institute for Information Technology (HIIT)
+# and the authors.  All rights reserved.
+#
+# Authors: Torsten Rueger <torsten@lightning.nu>
+#
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this software and associated documentation files
+# (the "Software"), to deal in the Software without restriction,
+# including without limitation the rights to use, copy, modify, merge,
+# publish, distribute, sublicense, and/or sell copies of the Software,
+# and to permit persons to whom the Software is furnished to do so,
+# subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be
+# included in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# start       :     last week it was really nice weather
+# one (+end)  :     last weekend it was really nice weather
+# two (order) :      it was really nice weather last week
+#
+# merged      :      it was really nice weather last weekend
+# It's a simple example, but getting that merged result is not.
+# it seems the standard diff/patch does not use move or copy as a command
+# as a consequence, simple avoidable conflicts are produced as soon as
+# move + edit touch the same data.
+# this is not only surprising as diff/patch have been around for so long
+# also the algorithms for detecting moves have been around for long:
+# basically there are the two approaches of using add/delete
+# or add/copy (or just copy). Diff/Patch just use the first, whereas the second
+# would allow better patching.
+# now the really interesting thing is that modern diff algorithms I know
+# (xdelta + vcdiff) use copy. As these algorithms are used to work on
+# 2 versions, A,B , producing a diff ab that then may be aplied to A and
+# A only, the algorithms use copy not because it's better for 3-way merging,
+# but because it's more efficient.
+# This algorithm is a 3 way merge on text files.
+# It turns out that it merges xml just fine though.
+# A very rough scetch is:
+# -- find the common parts of two files . We do this by a hashing technique
+#    once we have the common (copied) parts we add the added/deleted ones
+# -- common parts are sorted according to both original nad edited file order
+# -- merge by going through the list of parts. Start at one file and always
+#    swap to the other when there is a change in the other, a change from
+#    the original files order
+# Note: While this does not concentrate performance, it is quite reasonable,
+#       meaning linear in file size and quadrtic in numer of edits.
+#       Performace may be tested with the speedy.rb file in test directory
+#
+DEBUG = false #toggle this to true to get a complete trace of what happens
+# so here comes the main algorithm. We have the class to carry the original file
+# and it's hash codes we need for the matching of the files
+class Merge3
+  # make a three way merge, passing in strings and returning a string
+  # strings are byte arrays (not line arrays as in diff)
+  def Merge3::three_way( start_file , one_file , two_file , ignore_white_space = false )
+    # We also create chunks of the whole files, in case whitespace
+    # handling is on. Chunk::subchunk passed compressed whitespace on
+    start_chunk = Chunk.new( 0 , start_file.length , start_file )
+    one_chunk = Chunk.new( 0 , one_file.length , one_file )
+    two_chunk = Chunk.new( 0 , two_file.length , two_file )
+    if ignore_white_space
+      start_chunk.squeeze  # all consecutive whitespace is then squeezed
+      one_chunk.squeeze   # into one, and a talbe create to unsqueeze later
+      two_chunk.squeeze  # Yes, this is expensive
+    end
+    # only create the macthes hash once for the start file
+    start_matches = Merge3::all_matches(start_chunk)
+    # Create the chunks( moves first , then added and deleted)
+    one_seq = Sequence.new( one_chunk , start_chunk , start_matches )
+    two_seq = Sequence.new( two_chunk , start_chunk , start_matches )
+    puts "Diff original 0" , one_seq if DEBUG
+    puts "Diff original 1" , two_seq if DEBUG
+    # break all chunks into non overlapping regions (in the original file order)
+    one_seq.no_overlap( two_seq )
+    two_seq.no_overlap( one_seq )
+    puts "Diff non-overlapping 0" , one_seq if DEBUG
+    puts "Diff non-overlapping 1" , two_seq if DEBUG
+    # and finally (and most simply) do the merge
+    Merge3::merge( one_seq , two_seq  )
+  end
+  # given the Sequences of both files, attempt merging them.
+  # the chunks in the sequences are non-overlapping, so we can always work
+  # on whole chunks.
+  # In a sentence, the algorithm is now: Follow the line of change
+  def Merge3::merge( one_seq , two_seq )
+    # we go along the original file's order and change to the other sequence
+    #  if there has been a change compared to the original file.
+    out = []  # this is the array of string to be outputted, the result
+    # this doesn't consider changes at the beginning of the file
+    # which just goes to prove that we're researchers
+    one_chunk = one_seq.chunks.first
+    two_chunk = two_seq.chunks.first
+    last_choosen  = one_chunk
+    break_flag = nil
+    while break_flag.nil?
+      if DEBUG and one_chunk.from != two_chunk.from
+        #chunks are non overlapping, so they must be the same
+        raise "CHUNKS differ #{one_chunk}\n#{two_chunk}"
+      end
+      # we start with the next in the edited file order and then check where
+      next_one = one_seq.get_next( one_chunk )         # the change is
+      next_two = two_seq.get_next( two_chunk )
+      break_flag = true  if next_one.nil? or next_two.nil? #eof
+      # so if the change is in the first file,
+      if break_flag.nil? and one_chunk.org_stop != next_one.from
+        raise "conflict at #{one_chunk.org_stop}" if two_chunk.org_stop != next_two.from
+        # we find the one in file two that starts where it (one) ends
+        next_two = two_seq.find_from( next_one.from )
+        puts "ONE #{one_chunk}\n    #{two_chunk} nNEXT #{next_one}\n     #{next_two}" if DEBUG
+        next_choosen = next_one
+        # otherwise the change must be in file two
+      elsif break_flag.nil?
+        # and we look for the chunk in one, that starts where two ends
+        next_one = one_seq.find_from( next_two.from )
+        puts "TWO #{two_chunk}\n    #{one_chunk}\nNEXT #{next_two}\n     #{next_one}" if DEBUG
+        next_choosen = next_two
+      else
+        next_one , next_two = nil ,nil
+      end
+      this_del = ( one_chunk.kind_of?(Deleted) or two_chunk.kind_of?(Deleted) )
+      # Output the associated (real==whitespace expanded) string if not deleted
+      out << last_choosen.real_string unless this_del
+      puts "OUT #{Chunk::short_string(one_chunk.real_string)}" if DEBUG and not this_del
+      # we ignore adds (from one file) that happen to fall in the middle of a deleted in the other
+      one_del = ( one_chunk.kind_of?( Deleted) and next_one.kind_of?( Deleted) )
+      two_del = ( two_chunk.kind_of?( Deleted) and next_two.kind_of?( Deleted) )
+      unless one_del or two_del
+        # otherwise we output added text from both files
+        puts "ADD one #{one_chunk.added}" if DEBUG and not one_chunk.added.nil?
+        out << one_chunk.added.real_string  unless one_chunk.added.nil?
+        # and if both have added text create an asymetric result, hmm
+        puts "ADD two #{ two_chunk.added}" if DEBUG  and not two_chunk.added.nil?
+        out << two_chunk.added.real_string unless two_chunk.added.nil?
+      end
+      one_chunk = next_one
+      two_chunk = next_two
+      last_choosen = next_choosen
+    end
+    puts "RESULT" , out.join , "END" if DEBUG
+    return out.join    # joins the pieces and returns the merged string
+  end
+  # build a hash of chunks( hash of the chunks string against the chunk).
+  # This is the backbone of the matching algorithm, so that finding a match
+  # is basically a hash lookup
+  # Matches may be of different sizes, we use 16,32,48 and -1 for line matching
+  def Merge3::matches( chunk , size )
+    if size == -1 then
+      return  Merge3::match_newlines( chunk )
+    else
+      return Merge3::match( chunk , size )
+    end
+  end
+  # collect all matches (for the start file, so it doesn't have to be done twice)
+  def Merge3::all_matches( chunk )
+    # order counts, so large chunks overwrites small for hash collisions
+    matches =  Merge3::match(chunk,16)
+    matches.update(match(chunk,32))
+    matches.update(match(chunk,48))
+    matches.update(match_newlines(chunk))
+    matches
+  end
+  # find matches according to newline seperation.
+  def Merge3::match_newlines( chunk )
+    match = {} # the hash of matches , using the strings as key, chunk as value
+    index = 0 #count through the string length
+    if chunk.whitespace.empty? # whitespace compression on or not ?
+      # because there are only newline if whitespace compression is off
+      chunk.str.split("\n").each do |line|
+        # this logic strips the whitespace off both sides of the line, in an
+        # effort to match things like changed indentation
+        offset = 0
+        offset += 1 while line[offset,1] =~ /\s/
+        index += offset
+        line.strip!
+        cop = chunk.subchunk( index , line.length   , index )
+        match[cop.str] =  cop  # and store in our hash (no collision detection)
+        #puts "LINE #{index} #{line.length} #{offset}--#{cop.str}--"
+        index += line.length + 1  # 1 is the newline
+      end
+    else  #whitespace compression is on, so the whitespace table contains
+      # the newlines, just have to find them
+      chunk.whitespace.each do | at , str |
+        #puts "###{str}##"
+        next unless  str.include?( "\n" )
+        cop = chunk.subchunk( index + 1 , at - index   , index )
+        index =at
+        #puts "###{cop.str}##" if cop.str.include?("Four")
+        match[cop.str] =  cop  # and store in our hash (no collision detection)
+      end
+    end
+    match
+  end
+  # find matches for around the given length. Exactly the length for binary files,
+  # but if there is whitespace, we use that to break within 70% of the given
+  # value
+  def Merge3::match( chunk , around )
+    match = {} # the hash of matches , using the strings as key, chunks as value
+    index = 0 #count through the string length
+    while index < chunk.length  # also possible here to do half from the
+      left = chunk.length - index   #front and half from the back (?)
+      if left > 0.7*around and left <= 1.5*around   # are we at the end
+        len = chunk.length - index
+      else
+        sub = chunk.str[index ,  1.5*around]  # take the maximum length we want
+        if (white = sub.index( /\W/ ,  0.7 * around )).nil? # and check for non char
+          len = around    # either taking the whole
+        else     #or the "words" we found
+          len = white   #hoping this is better for text, so we have no allignment
+        end           # issues
+      end  #then create a chunk with the indexes and string
+      cop = chunk.subchunk( index , len , index )
+      match[cop.str] =  cop  # and store in our hash (no collision detection)
+      index += len
+    end
+    # now we only take one from the back,because that's often a match and will be
+    at = (chunk.length - 0.7*around).to_i
+    cop = chunk.subchunk( at , (0.7*around).to_i , at  )
+    match[cop.str] =  cop
+    match
+  end
+end
+# A Sequence represents a sequence of chunks , in other words the modified file
+# broken into the chunks that were found
+# the sequence keeps the chunks as lists, one in edited file order
+# and one in original file order
+class Sequence
+  attr :chunks      # the list of chunks, sorted in order of the edited file
+  attr :org_chunks  # the list of chunks, sorted in order of the original file
+  # output both lists of chunks, edited and original, with chunks and numbers
+  # but strings only up to 70 chars (for readability)
+  def to_s
+    ret = ""
+    @chunks.each_with_index do | chunk , index |
+      ret += index.to_s + " - " + chunk.to_s + "\n"
+    end
+    @org_chunks.each_with_index do | chunk , index |
+      ret += index.to_s + " - " + chunk.to_s + "\n"
+    end
+    return ret
+  end
+  # find the difference from (file, or byte array) too to start
+  # (matches are precalculated matches for start ) start.
+  # Collect the differences as chunks (copies or adds) and mark deletes
+  # The algorithm is "greedy", meaning if it finds a match (using the
+  # matches) it tries to extend in both directions
+  def initialize too_chunk , start_chunk , start_matches
+    @chunks = []
+    @org_chunks = []
+    # run by length to get big (sure) hits first (-1 == newlines)
+    [ -1 , 48 , 32, 16 ].each  do |c_size|
+      two_matches = Merge3::matches(too_chunk , c_size).each do |key , two|
+        next if   (start = start_matches[two.str]).nil?
+        raise "Keys collide :#{start}"   if  start.str != two.str
+        next if done?( two , start )
+        # here comes the greedy part, first left then right
+        two_start , start_start , len  = two.start , start.start , two.length
+        puts two , "MINUS" if len <=  0 and DEBUG
+        start_rim , edit_rim = find_left_rim( start_start , two_start )
+        while ( start_chunk.str[start_start - 1] == too_chunk.str[two_start - 1] ) and
+            (  edit_rim < two_start  )  and (  start_rim < start_start )
+          two_start -= 1
+          start_start -= 1
+          len += 1 # to keep the right end where it was
+        end
+        start_rim , edit_rim = find_right_rim( start_start + len , two_start + len , start_chunk.length , too_chunk.length )
+        while ( start_chunk.str[start_start + len  ] == too_chunk.str[two_start + len ] ) and
+            ((len + start_start) < start_rim)  and ((two_start + len) < edit_rim)
+          len += 1
+        end
+        chunk =  too_chunk.subchunk(two_start,len , start_start )
+        puts "Matched #{chunk}" if DEBUG
+        add_chunk( chunk )
+      end
+    end
+    # now find the parts that were deleted
+    #  (gaps in the matching of the original file)
+    each_org_pair do | org , next_org |
+      if org.org_stop < next_org.from
+        del_str = start_chunk.str[org.org_stop , next_org.from  - org.org_stop ]
+        puts "DELETED #{org.org_stop} --#{del_str}--" if DEBUG
+        del = Deleted.new( org.stop , org.org_stop , del_str)
+        add_chunk( del )
+      end
+    end
+    # now find the parts that were added (gaps in the matching of the edited file)
+    adds = {}
+    each_pair do |chunk , next_chunk |
+      if chunk.stop < next_chunk.start
+        add = too_chunk.subchunk( chunk.stop , next_chunk.start - chunk.stop , -1 )
+        puts "ADDING  #{add} "  if DEBUG
+        chunk.added =  add   # hang the add onto the chunk
+        # this following logic has fixed some cases, where the matching had
+        # been somewhat unintuitive. Though correct it produced
+        # unneccessary conflicts, in conjunction with deletes
+        while add.str[0] == next_chunk.str[0]  and
+            add.str[0] != 32 and    # not spaces, avoid whitespace headaches
+            chunk.org_stop == next_chunk.from
+          puts "before: #{chunk} \nNext:#{next_chunk}"  if DEBUG
+          add.rotate  #put the first to the end
+          remove_chunk( next_chunk )
+          # move the first from the next to the end of the last
+          chunk.push( next_chunk.pop )
+          puts "after: #{chunk} \nNext:#{next_chunk}"  if DEBUG
+          add_chunk( next_chunk )
+        end unless chunk.kind_of?(Deleted) or next_chunk.kind_of?(Deleted)
+      end
+    end
+  end
+  # helper to iterate over each pair in the edited file order
+  def each_pair
+    return if @chunks.length < 2
+    idx = 1
+    chunk = @chunks[0]
+    while idx < @chunks.length
+      next_chunk = @chunks[idx]
+      yield(chunk , next_chunk)
+      chunk = next_chunk
+      idx = idx + 1
+    end
+  end
+  # helper to iterate over each pair in the original file order
+  def each_org_pair
+    return if @org_chunks.length < 2
+    idx = 1
+    chunk = @org_chunks[0]
+    while idx < @org_chunks.length
+      next_chunk = @org_chunks[idx]
+      yield(chunk , next_chunk)
+      chunk = next_chunk
+      idx = idx + 1
+    end
+  end
+  # get the next chunk in edited file order
+  def get_next( prev )
+    idx = @chunks.index prev
+    return nil unless idx
+    return nil if idx == @chunks.length - 1
+    nekst = @chunks[idx + 1 ]
+    return nekst
+  end
+  # add this chunk. This implementation keeps the order of both
+  # arrays correct all the time
+  def add_chunk( chunk )
+    raise "Nil added " if not chunk
+    raise "False added " if  chunk == false
+    @chunks.push( chunk )
+    @chunks.sort! {  |a , b |
+      ret = a.start <=> b.start
+      # so this adds the Deleted chunks into the edited file too
+      if ret == 0
+        ret =  -1 if a.kind_of? Deleted
+        ret =  1 if b.kind_of? Deleted
+      end
+      ret
+    }
+    @org_chunks.push(chunk)
+    @org_chunks.sort! do  |a , b | a.from <=> b.from end
+  end
+  def remove_chunk( chunk ) # for rotation, will be readded
+    @chunks.delete(chunk)
+    @org_chunks.delete(chunk)
+  end
+  #find the chunk that starts at the given position in the original file
+  def find_from pos
+    each_org do | chunk |
+      #puts "FOUND for #{pos} #{chunk}"
+      return chunk if chunk.from == pos
+    end
+    raise "Not found #{pos} in\n#{self}"
+    # hmmm this was here for some reason I don't recall
+    # return chunk.next if chunk.added and chunk.added.start == pos
+  end
+  # find the chunk left to these positions (original and edited) and return
+  # the maximum bound a new chunk can expand too in both coordinates.
+  def find_left_rim org , pos
+    start_rim , two_rim = 0 ,0
+    each_org do | two |
+      start_rim = two.org_stop if start_rim  <= two.org_stop and two.org_stop <= org
+    end
+    each do | one |
+      two_rim = one.stop if  two_rim <= one.stop and one.stop <= pos
+    end
+    #puts "Left rim #{org} #{pos} is #{start_rim} #{two_rim}" if DEBUG
+    return start_rim , two_rim
+  end
+  # find the chunk right to these positions (original and edited) and return
+  # the maximum bound a new chunk can expand too in both coordinates.
+  def find_right_rim start , two , start_start , two_start
+    start_rim , two_rim = start_start , two_start
+    each_org do | tw |
+      start_rim = tw.from if start <= tw.from and tw.from <= start_rim
+    end
+    each do | one |
+      two_rim = one.start if two <= one.start  and one.start <= two_rim
+    end
+    #puts "\Right rim #{start} #{two} is #{start_rim} #{two_rim}" if DEBUG
+    return start_rim , two_rim
+  end
+  # that part is done if the are (start-stop) is already included in the
+  # patches (first if) or the org part overlaps any of the patches org's
+  # the second case is then a copy
+  def done? two , org
+    #puts "checking done? \n #{two} \n #{org} "
+    each do |chunk|
+      return true if chunk.overlaps? two
+    end
+    each_org do |chunk|
+      return true if chunk.org_overlaps? org
+    end
+    #puts "NOT DONE? : \n #{self}"
+    false
+  end
+  # yields all chunks in the sequence in edited file order
+  def each
+    @chunks.each do |chunk|
+      yield chunk if chunk
+    end
+  end
+  # yields all chunks in original file order
+  def each_org
+    @org_chunks.each do |chunk|
+      yield chunk if chunk
+    end
+  end
+  # change all chunks in this sequence so that they don't have overlaps with
+  # any of the chunks in the given sequence. done both ways results in
+  # non-overlapping sequences
+  def no_overlap sequence
+    each_org do |one|
+      sequence.each_org do |two|
+        break_at( one , two.from )     if one.org_includes?( two.from )
+        break_at( one , two.org_stop ) if one.org_includes?( two.org_stop )
+      end
+    end
+  end
+  # split in two at pos (thus assuming pos is included) and link the
+  def break_at( chunk , pos )
+    return if pos == chunk.from or pos == chunk.org_stop
+    old_s = chunk.to_s
+    new_chunk = chunk.split_at_org!(pos)
+    if not new_chunk.kind_of? Deleted
+      new_chunk.added = chunk.added
+      chunk.added = nil
+    end
+    #puts "SPLIT  #{pos}\nBEFORE #{old_s}\nOLD:#{chunk} \nNEW:#{new_chunk} \n" if DEBUG
+    idx = @chunks.index(chunk)
+    @chunks[idx+1,0] = new_chunk
+    idx = @org_chunks.index(chunk)
+    @org_chunks[idx+1,0] = new_chunk
+  end
+end
+# this is  a little holder class (struct?) that represents a piece of text in
+# a file.
+class Chunk
+  # the byte index into the file where the string is in (the edited file)
+  attr_reader :start
+  #length of the string (somewhat redundant with the string below)
+  attr_reader :length
+  # the actual string. This is just here for convinience, it's in the file
+  attr_reader :str
+  # the index into the original file where the string starts, -1 for adds
+  attr_reader :from
+  attr :added   , true   # a chunk that was added (if there is such a one)
+  attr :whitespace , true # the table for whitespace
+  # cut the chunk to max 71 characters for readable output
+  def Chunk::short_string( s )
+    if s.length < 71
+      return "--" + s.to_s + "--"
+    else
+      return "--"  +  s[0,20] + "...Cut #{s.length-40} chars..." + s[s.length - 20 , 20] + "--"
+    end
+  end
+  def initialize( start , length , string , from = -1)
+    raise "Error nil string passed start=#{start} length=#{length} str=-#{str}=" if string.nil?
+    @start , @length ,  @str , @from = start , length , string , from
+    @whitespace = []
+  end
+  # outputs all attributes and a readable version of the string
+  # also whitespace if present
+  def to_s  #for debug output
+    st = "start=#{start} len=#{length} stop=#{stop} from=#{from} org_stop=#{org_stop} "
+    st += Chunk::short_string(real_string)
+    st += " \n  added=#{added} " unless added.nil?
+    st += " \n  whitespace=#{@whitespace.length} " unless @whitespace.empty?
+    ws_count = 0
+    @whitespace.each do | start , s |
+      st += "-" + start.to_s + " " + s.length.to_s + " "
+      st += "n " if s.include?( "\n" )
+      ws_count += 1
+      break if ws_count  > 5
+    end
+    st
+  end
+  # put the first character at the end
+  def rotate
+    @str = @str[1..-1] + @str[0,1]
+    @start += 1
+  end
+  # puts the  char at the end of the string, adjusting the length
+  # add the whitespace if it's not nil
+  def push( arg )
+    char , space = arg
+    @str << char
+    @length += 1
+    @whitespace << space unless space.nil?
+  end
+  # return the first char, and remove it from the string, adjusting length + start
+  # also return any whitespace entry, if there is such a thing
+  def pop
+    white = nil
+    if (not @whitespace.empty?)  and  (@whitespace.first[0] == @start  )
+      white = @whitespace.delete_at(0)
+      puts "White --#{white}--#{@start}"
+    end
+    char = @str[0]
+    @str = @str[1..-1]
+    @length  -= 1
+    @start += 1
+    @from += 1 if @from != -1
+    return [ char , white ]
+  end
+  # squeeze the whitespace, ie for all sequences of whitespace(space,tab,newline),
+  # and all non space whitespaces, replace it with a single whitespace and record
+  # the change in the array
+  def squeeze
+    old = @str.dup
+    @whitespace = []
+    at = 0
+    while ( index =  @str.index(/\s+/ , at ) )
+      at = index + $&.length
+      if $& != ' '  # do nothing for single spaces
+        @whitespace.push [ index , $&  ]
+        @str[index , $&.length] = ' '
+      end
+    end
+    @length = @str.length
+    throw old if DEBUG and old != real_string
+  end
+  # unsqueeze whitespace if present
+  def real_string
+    return @str if @whitespace.empty?
+    stri = @str.dup
+    #puts "--" + str + "--" + @str.length.to_s
+    begin
+      @whitespace.reverse.each do | index , st |
+        stri[index - @start ,1] = st
+      end
+    rescue
+      st = "start=#{start} len=#{length} stop=#{stop} from=#{from} org_stop=#{org_stop} "   + @str
+      st += " \n  whitespace=#{@whitespace.length} "
+      @whitespace.each do | start , s |
+        st += "-" + start.to_s + " " + s.length.to_s + " "
+        st += "n " if s.include?( "\n" )
+      end
+      puts st
+      raise
+    end
+    stri
+  end
+  # get a substring from the chunk and carry whitespace table accross
+  # start is in the files coordinates (not the strings of the chunk)
+  def subchunk( startt  , len , fromm )
+    stri = @str[ startt - @start , len ]
+    table = []
+    @whitespace.each do | arr |
+      table << arr if arr[0] >= startt and arr[0]  < ( startt + len )
+    end
+    chunk = Chunk.new( startt , len , stri , fromm )
+    chunk.whitespace = table
+    #puts "SUB at #{startt} #{len} #{chunk}" if fromm == 62
+    chunk
+  end
+  # index of where the string ends.
+  # this should be called end, but that's a keyword.
+  def stop
+    @start + @length
+  end
+  # index of where the string ends in the original file (nonsense for adds)
+  def org_stop
+    @from + @length
+  end
+  def add?  # adds are not copied, hence have no from attribute.
+    @from == -1  #   a copy carries a positive number as offset into the original
+  end
+  # is the point (char index) in the original copied string
+  def org_includes? point
+    ( @from != -1 ) and ( @from <= point ) and ( point < org_stop )
+  end
+  def includes_copy? copy  # is the copy chunk fully inside this
+    (copy.start >= @start) and (copy.stop <= stop)
+  end
+  # return a substring in the original files coordinates
+  def rstring( fro , to )
+    to = org_stop if to == -1
+    throw "error #{fro} #{to} #{self}" if fro < @from or to > org_stop
+    @length -= to - fro
+    @str[ fro - @from , to - @from ]
+  end
+  # does some part of two overlap. always takes me 5 minutes to get
+  def overlaps? two           # but a drawing helps
+    start > two.start ?  start < two.stop : stop > two.start
+  end
+  #does any part of the original overlap (from - org_stop )
+  def org_overlaps? two
+    from > two.from ?  from < two.org_stop : org_stop > two.from
+  end
+  # this joins two chunks, which are presumed to be adds (so no fiddling with from)
+  # self is changed and the argument untouched (to be deleted)
+  def join_adds add
+    @str += add.str
+    @length = @str.length
+  end
+  #split the chunk into two at the given position,
+  # change this and return the right one
+  def split_at_org! pos
+    #puts "SPLIT #{pos}  #{self}"
+    throw "position not in chunk #{pos} : #{self}" unless org_includes?( pos )
+    left_length = pos - @from
+    right_length = org_stop - pos
+    right = Chunk.new( @start + left_length , right_length , @str[left_length, right_length] , pos )
+    right.whitespace = @whitespace.dup.delete_if do | index , s |
+      index < right.start
+    end
+    #puts " split #{right} "
+    throw "Right split error #{self} , #{right} :#{pos}" if right.length != right_length
+    @str = @str[0, left_length]
+    raise "Error nil string =#{start} length=#{length} str=-#{@str}=" if @str.nil?
+    @length = @str.length
+    @whitespace.delete_if do | index , s |
+      index >= right.start
+    end
+    #puts " white #{@whitespace} "
+    #puts " left #{self} "
+    throw "Left split error #{self} , #{right} :#{pos} :#{left_length}" if @length != left_length
+    return right
+  end
+end
+# a seperate class for deleted chunks (for destinction)
+class Deleted < Chunk
+  def initialize start , from , string
+    @start = start
+    @length = string.length
+    @str = string
+    @from = from
+    @whitespace = []
+    raise "Error nil string =#{start} length=#{length} str=-#{@str}=" if string.nil?
+  end
+  def stop # no length in the modified file
+    @start
+  end
+  def split_at_org! pos
+    raise "position not in chunk #{pos} : #{self}" unless org_includes?( pos )
+    left_length = pos - @from
+    right_length = org_stop - pos
+    right = Deleted.new( @start + left_length , pos , @str[left_length, @length]  )
+    raise "Back to the right drawingboard #{self} , #{right} :#{pos}" if right.length != right_length
+    @str = @str[0, left_length]
+    @length = @str.length
+    raise "Back to the left drawingboard #{self} , #{right} :#{pos} :#{left_length}" if @length != left_length
+    raise "Error nil string =#{start} length=#{length} str=-#{@str}=" if @str.nil?
+    return right
+  end
+  def to_s
+    super.to_s + " deleted"
+  end
+end
+def main()
+  ignore_whitespace = false
+  args = ARGV.dup
+  if args[0] == "-w"
+    ignore_whitespace = true
+    args.shift
+  end
+  start = File.new( args.shift ).read
+  one = File.new( args.shift ).read
+  two = File.new( args.shift ).read
+  result = Merge3::three_way( start , one , two , ignore_whitespace  )
+  puts result
+end
+main() if $0 == __FILE__

metadata CHANGED

@@ -1,11 +1,12 @@
 --- !ruby/object:Gem::Specification
-rubygems_version: 0.8.3
+rubygems_version: 0.8.11
 specification_version: 1
 name: merge3
 version: !ruby/object:Gem::Version
-  version: "0.8"
-date: 2004-12-22
-summary: This gem demonstrates executable scripts
+  version: "0.9"
+date: 2006-05-03 00:00:00 +03:00
+summary: "Three way merge, including recognition of moves and edit in moves, unlike
+  traditional tools"
 require_paths:
   - lib
 email:
@@ -24,9 +25,12 @@ required_ruby_version: !ruby/object:Gem::Version::Requirement
         version: "1.6"
   version:
 platform: ruby
+signing_key:
+cert_chain:
 authors: []
 files:
   - lib/merge3.rb
+  - lib/merge3.rb.~1.2.~
 test_files: []
 rdoc_options: []
 extra_rdoc_files: []