RubyGems - amp-git - Versions diffs - 0.1.0 - Mend

amp-git 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

data/.document +5 -0
data/.gitignore +23 -0
data/Gemfile +15 -0
data/LICENSE +20 -0
data/README.rdoc +17 -0
data/Rakefile +68 -0
data/VERSION +1 -0
data/features/amp-git.feature +9 -0
data/features/step_definitions/amp-git_steps.rb +0 -0
data/features/support/env.rb +4 -0
data/lib/amp-git/encoding/binary_delta.rb +171 -0
data/lib/amp-git/repo_format/changeset.rb +348 -0
data/lib/amp-git/repo_format/commit_object.rb +87 -0
data/lib/amp-git/repo_format/index.rb +169 -0
data/lib/amp-git/repo_format/loose_object.rb +78 -0
data/lib/amp-git/repo_format/packfile.rb +263 -0
data/lib/amp-git/repo_format/packfile_index.rb +196 -0
data/lib/amp-git/repo_format/raw_object.rb +56 -0
data/lib/amp-git/repo_format/staging_area.rb +215 -0
data/lib/amp-git/repo_format/tag_object.rb +87 -0
data/lib/amp-git/repo_format/tree_object.rb +98 -0
data/lib/amp-git/repo_format/versioned_file.rb +133 -0
data/lib/amp-git/repositories/local_repository.rb +192 -0
data/lib/amp-git/repository.rb +57 -0
data/lib/amp-git.rb +49 -0
data/lib/amp_plugin.rb +1 -0
data/spec/amp-git_spec.rb +15 -0
data/spec/repository_spec.rb +74 -0
data/spec/spec.opts +1 -0
data/spec/spec_helper.rb +29 -0
data/test/index_tests/index +0 -0
data/test/index_tests/test_helper.rb +16 -0
data/test/index_tests/test_index.rb +69 -0
data/test/packfile_tests/hasindex.idx +0 -0
data/test/packfile_tests/hasindex.pack +0 -0
data/test/packfile_tests/pack-4e1941122fd346526b0a3eee2d92f3277a0092cd.pack +0 -0
data/test/packfile_tests/pack-d23ff2538f970371144ae7182c28730b11eb37c1.idx +0 -0
data/test/packfile_tests/test_helper.rb +16 -0
data/test/packfile_tests/test_packfile.rb +75 -0
data/test/packfile_tests/test_packfile_index_v2.rb +90 -0
data/test/packfile_tests/test_packfile_with_index.rb +76 -0
data/test/test_commit_object.rb +60 -0
data/test/test_git_delta.rb +67 -0
data/test/test_helper.rb +71 -0
data/test/test_loose_object.rb +51 -0
data/test/test_tag_object.rb +72 -0
data/test/test_tree_object.rb +55 -0
metadata +215 -0

data/lib/amp-git/repo_format/commit_object.rb ADDED Viewed

@@ -0,0 +1,87 @@
+##################################################################
+#                  Licensing Information                         #
+#                                                                #
+#  The following code is licensed, as standalone code, under     #
+#  the Ruby License, unless otherwise directed within the code.  #
+#                                                                #
+#  For information on the license of this code when distributed  #
+#  with and used in conjunction with the other modules in the    #
+#  Amp project, please see the root-level LICENSE file.          #
+#                                                                #
+#  © Michael J. Edgar and Ari Brown, 2009-2010                   #
+#                                                                #
+##################################################################
+# This was written by reading the Git Book. No source code was
+# examined to produce this code. It is the original work of its
+# creators, Michael Edgar and Ari Brown.
+#
+# http://book.git-scm.com/7_how_git_stores_objects.html
+module Amp
+  module Core
+    module Repositories
+      module Git
+        ##
+        # = CommitObject
+        #
+        # This is a commit object in the git system. This contains a reference to
+        # a tree, one or more parents, an author, a committer, and a message. This
+        # object is all you need to know everything about a commit.
+        class CommitObject < RawObject
+          attr_reader :tree_ref, :parent_refs, :author, :committer, :date, :message
+          ##
+          # Initializes the CommitObject. Needs a hash to identify it and
+          # an opener. The opener should point to the .git directory. Immediately
+          # parses the object.
+          #
+          # @param [String] hsh the hash to use to find the object
+          # @param [Support::RootedOpener] opener the opener to use to open the
+          #   object file
+          # @param [String] content if the content is known already, use
+          #   the provided content instead
+          def initialize(hsh, opener, content = nil)
+            if content
+              @hash_id, @opener = hsh, opener
+              @type = 'commit'
+              @content = content
+            else
+              super(hsh, opener)
+            end
+            @parent_refs = []
+            parse!
+          end
+          private
+          ##
+          # Parses the commit object into our attributes.
+          def parse!
+            lines = @content.split("\n")
+            last_idx = 0
+            lines.each_with_index do |line, idx|
+              case line
+              when /^tree (.{40})/
+                @tree_ref = Support::StringUtils.unhexlify($1)
+              when /^parent (.{40})/
+                @parent_refs << Support::StringUtils.unhexlify($1)
+              when /^author #{AUTHOR_MATCH}/
+                @author = "#{$1} <#{$2}>"
+                @date = Time.at($3.to_i)
+              when /^committer #{AUTHOR_MATCH}/
+                @committer = "#{$1} <#{$2}>"
+                @date = Time.at($3.to_i)
+              when ""
+                last_idx = idx + 1
+                break
+              end
+            end
+            @message = lines[last_idx..-1].join("\n")
+          end
+        end
+      end
+    end
+  end
+end

data/lib/amp-git/repo_format/index.rb ADDED Viewed

@@ -0,0 +1,169 @@
+##################################################################
+#                  Licensing Information                         #
+#                                                                #
+#  The following code is licensed, as standalone code, under     #
+#  the Ruby License, unless otherwise directed within the code.  #
+#                                                                #
+#  For information on the license of this code when distributed  #
+#  with and used in conjunction with the other modules in the    #
+#  Amp project, please see the root-level LICENSE file.          #
+#                                                                #
+#  © Michael J. Edgar and Ari Brown, 2009-2010                   #
+#                                                                #
+##################################################################
+# This was written by reading the Git Book. No source code was
+# examined to produce this code. It is the original work of its
+# creators, Michael Edgar and Ari Brown.
+#
+# http://book.git-scm.com/7_the_packfile.html
+# http://repo.or.cz/w/git.git?a=blob;f=Documentation/technical/pack-format.txt;h=1803e64e465fa4f8f0fe520fc0fd95d0c9def5bd;hb=HEAD
+# http://git.rsbx.net/Documents/Git_Data_Formats.txt
+module Amp
+  module Core
+    module Repositories
+      module Git
+        ##
+        # = Index
+        #
+        # The Index is essentially a cache of the working directory. It tracks
+        # which files have been added to the staging area and which have not, and
+        # can be used to check if a file has been modified or not. It is a relatively
+        # complex binary format and there are two versions of it we also have to
+        # support.
+        module Index
+          class IndexParseError < StandardError; end
+          ##
+          # Parses the given file as an Index, and returns the appropriate subclass of Index.
+          # There are two versions that are supported and each needs to be able to handle
+          # status lookups and so on.
+          #
+          # @param [String] file the name of the file to open
+          # @param [Support::RootedOpener] opener an opener to scope the opening of files
+          # @return [AbstractIndex] the index subclass this file represents
+          def self.parse(file, opener)
+            opener.open(file, "r") do |fp|
+              if fp.read(4) != "DIRC"
+                raise IndexParseError.new("#{file} is not an index file.")
+              end
+              version = fp.read(4).unpack("N").first
+              case version
+              when 1
+                IndexVersion1.new(fp)
+              when 2
+                IndexVersion2.new(fp)
+              end
+            end
+          end
+          ##
+          # The format of each index entry is as follows:
+          #   create_time, 32-bits # in seconds, least-significant bits if rollover
+          #   create_time_nanoseconds, 32-bits
+          #   modify_time, 32-bits # in seconds, least-significant bits if rollover
+          #   modify_time_nanoseconds, 32-bits
+          #   device, 32-bits # device id
+          #   inode, 32-bits # inode from the filesystem
+          #   mode, 32-bits # permissions/mode from the FS
+          #   uid, 32-bits # user ID from the FS
+          #   gid, 32-bits # group ID from the FS
+          #   size, 32-bits # filesize, least-significant-bits, from FS
+          #   hash_id, 20 bytes # sha-1 hash of the data
+          #   assume_valid, 1 bit # flag for whether this file should be assumed to be unchanged
+          #   update_needed, 1 bit # flag saying the file needs to be refreshed
+          #   stage, 2 bits # two flags used for merging
+          #   filename_size, 12 bits # the size of the upcoming filename in bytes
+          #   filename, N bytes # the name of the file in the index
+          #   padding, N bytes # null padding. At least 1 byte, enough to make the block's size a
+          #     multiple of 8 bytes
+          #
+          #   This class is a big effing struct for this.
+          class IndexEntry < Struct.new(:ctime, :ctime_ns, :mtime, :mtime_ns, :dev, :inode, :mode, :uid, :gid, :size,
+                                        :hash_id, :assume_valid, :update_needed, :stage, :name)
+            ENTRY_HEADER_FORMAT = "NNNNNNNNNNa20n"
+            ENTRY_HEADER_SIZE   = 62
+            def initialize(*args)
+              if args.size > 0 && args[0].kind_of?(IO)
+                fp = args.first
+                header = fp.read(ENTRY_HEADER_SIZE).unpack(ENTRY_HEADER_FORMAT)
+                self.ctime, self.ctime_ns, self.mtime, self.mtime_ns, self.dev, self.inode,
+                            self.mode, self.uid, self.gid, self.size, self.hash_id, flags = header
+                self.assume_valid  = flags & 0x8000 > 0
+                self.update_needed = flags & 0x4000 > 0
+                self.stage  = (flags & 0x3000) >> 12
+                namesize = flags & 0x0FFF
+                self.name = fp.read(namesize)
+                mod = (ENTRY_HEADER_SIZE + namesize) & 0x7
+                padding_len = mod == 0 ? 8 : 8 - mod
+                fp.read(padding_len)
+              else
+                super
+              end
+            end
+          end
+          ##
+          # Generic Index class, handles common initialization and generic methods
+          # that aren't different between different versions of the index
+          class AbstractIndex
+            def initialize(fp)
+              @entry_map = {}
+              @entry_count = fp.read(4).unpack("N").first
+            end
+            ##
+            # @return [Integer] the number of entries in the Index.
+            def size
+              @entry_count
+            end
+            ##
+            # Returns an IndexEntry for the file with the given name.
+            # Returns nil on failure, and this should not be used by end-users
+            #
+            # @param [String] name the name of the object/file to look up
+            # @return [IndexEntry, NilClass] the entry with the given name, or nil
+            def [](name)
+              @entry_map[name]
+            end
+            def read_entries(fp)
+              @entries = []
+              @entry_count.times do
+                new_entry = IndexEntry.new(fp)
+                @entries << new_entry
+                @entry_map[new_entry.name] = new_entry
+              end
+            end
+            def inspect
+              "<Git Index, entries: #{@entry_count}>"
+            end
+          end
+          ##
+          # Older version of the index. Not used anymore by git.
+          class IndexVersion1 < AbstractIndex
+            def initialize(fp)
+              super
+              @checksum = fp.read(20)
+              read_entries(fp)
+            end
+          end
+          ##
+          # Newer version of the index - default format of the index.
+          class IndexVersion2 < AbstractIndex
+            def initialize(fp)
+              super
+              read_entries(fp)
+            end
+          end
+        end
+      end
+    end
+  end
+end

data/lib/amp-git/repo_format/loose_object.rb ADDED Viewed

@@ -0,0 +1,78 @@
+##################################################################
+#                  Licensing Information                         #
+#                                                                #
+#  The following code is licensed, as standalone code, under     #
+#  the Ruby License, unless otherwise directed within the code.  #
+#                                                                #
+#  For information on the license of this code when distributed  #
+#  with and used in conjunction with the other modules in the    #
+#  Amp project, please see the root-level LICENSE file.          #
+#                                                                #
+#  © Michael J. Edgar and Ari Brown, 2009-2010                   #
+#                                                                #
+##################################################################
+# This was written by reading the Git Book. No source code was
+# examined to produce this code. It is the original work of its
+# creators, Michael Edgar and Ari Brown.
+#
+# http://book.git-scm.com/7_how_git_stores_objects.html
+module Amp
+  module Core
+    module Repositories
+      module Git
+        ##
+        # = LooseObject
+        #
+        # A single loose object (tree, tag, commit, etc.) in the Git system.
+        # Its type and content will be determined after we read the file.
+        #
+        # It is uniquely identified by a SHA1 hash.
+        class LooseObject < RawObject
+          class << self
+            def lookup(hsh, opener)
+              require 'scanf'
+              path = File.join("objects", hsh[0..1], hsh[2..40])
+              mode = "r"
+              type, content = nil, nil
+              begin
+                opener.open(path, mode) do |fp|
+                  type, content_size = fp.scanf("%s %d")
+                  fp.seek(type.size + 1 + content_size.to_s.size + 1, IO::SEEK_SET)
+                  content = fp.read(content_size)
+                end
+              rescue SystemCallError
+                if create
+                  FileUtils.mkdir_p(opener.join("objects", hsh[0..1]))
+                  mode = "w+"
+                  retry
+                else
+                  raise
+                end
+              end
+              RawObject.construct(hsh, opener, type, content)
+            end
+          end
+          attr_accessor :type
+          ##
+          # Initializes the RawObject. Needs a hash to identify it and
+          # an opener. The opener should point to the .git directory.
+          #
+          # @param [String] hsh the hash to use to find the object
+          # @param [Support::RootedOpener] opener the opener to use to open the
+          #   object file
+          def initialize(hsh, opener, content = nil)
+            @hash_id, @opener, @content = hsh, opener, content
+          end
+        end
+      end
+    end
+  end
+end

data/lib/amp-git/repo_format/packfile.rb ADDED Viewed

@@ -0,0 +1,263 @@
+##################################################################
+#                  Licensing Information                         #
+#                                                                #
+#  The following code is licensed, as standalone code, under     #
+#  the Ruby License, unless otherwise directed within the code.  #
+#                                                                #
+#  For information on the license of this code when distributed  #
+#  with and used in conjunction with the other modules in the    #
+#  Amp project, please see the root-level LICENSE file.          #
+#                                                                #
+#  © Michael J. Edgar and Ari Brown, 2009-2010                   #
+#                                                                #
+##################################################################
+# This was written by reading the Git Book. No source code was
+# examined to produce this code. It is the original work of its
+# creators, Michael Edgar and Ari Brown.
+#
+# http://book.git-scm.com/7_the_packfile.html
+# http://repo.or.cz/w/git.git?a=blob;f=Documentation/technical/pack-format.txt;h=1803e64e465fa4f8f0fe520fc0fd95d0c9def5bd;hb=HEAD
+# http://git.rsbx.net/Documents/Git_Data_Formats.txt
+module Amp
+  module Core
+    module Repositories
+      module Git
+        ##
+        # = PackFile
+        #
+        # Git uses it's "gc" command to pack loose objects into PackFiles.
+        # This is one file, preferably with an index (though not requiring one),
+        # which stores a number of objects in a very simple raw form.
+        #
+        # The index is *not* necessary. It is simply preferable because otherwise
+        # you have to uncompress each object in a raw, then calculate the hash of
+        # the object, just to find out where each object is and what its hash is.
+        class PackFile
+          include Amp::Core::Support
+          ##
+          # A single entry in a packfile. Dumb struct. However, it has some smart
+          # class methods for parsing these bad boys in from a packfile. Take a
+          # look at {#at} and {#read}.
+          class PackFileEntry < Struct.new(:type, :size, :content, :hash_id, :reference, :offset, :delta_offset)
+            include Amp::Core::Support
+            class << self
+              ##
+              # Reads a {PackFileEntry} from the given file at a given offset.
+              # This is a helper method for the entry point for reading from an actual PackFile,
+              # since often, you'll know where the entry will be located in the PackFile.
+              #
+              # @param [IO, #read] fp the file to read from
+              # @return [PackFileEntry] an entry, decompressed and with its hash calculated.
+              def at(fp, pos)
+                fp.seek pos, IO::SEEK_SET
+                read fp
+              end
+              ##
+              # Reads a {PackFileEntry} from the given file. This is the entry point for
+              # reading from an actual PackFile.
+              #
+              # @param [IO, #read] fp the file to read from
+              # @return [PackFileEntry] an entry, decompressed and with its hash calculated.
+              def read(fp)
+                result = PackFileEntry.new
+                result.offset = fp.pos
+                result.type, result.size = read_header(fp)
+                if result.type == OBJ_REF_DELTA
+                  result.reference = fp.read(20)
+                elsif result.type == OBJ_OFS_DELTA
+                  result.delta_offset = result.offset - read_offset(fp)
+                end
+                result.content = read_data(fp, result.size)
+                if result.type == OBJ_REF_DELTA
+                elsif result.type == OBJ_OFS_DELTA
+                  cur = fp.tell
+                  patch = Amp::Core::Repositories::Git::Encoding::BinaryDelta.new(result.content)
+                  previous = self.at(fp, result.delta_offset)
+                  result.content = patch.apply(previous.content)
+                  result.size = result.content.size
+                  result.type = previous.type
+                  fp.seek(cur, IO::SEEK_SET)
+                end
+                result.calculate_hash!
+                result
+              end
+              ##
+              # Reads an OBJ_OFS_DELTA offset. N-bytes, encoded as a series of
+              # bytes.  Each byte is shifted by (7 * n) bits, and added to the
+              # total. If the high bit (MSB) of a byte is 1, then another byte is
+              # read, If it's 0, it stops.
+              #
+              # @param [IO, #read] fp the IO stream to read from
+              # @return [Integer] the offset read
+              def read_offset(fp)
+                byte = Support::StringUtils.ord(fp.read(1))
+                tot = byte & 0x7f
+                while (byte & 0x80) > 0
+                  byte = Support::StringUtils.ord(fp.read(1))
+                  tot = ((tot + 1) << 7) | (byte & 0x7f)
+                  break if (byte & 0x80) == 0
+                end
+                tot
+              end
+              ##
+              # Reads in a PackFileEntry header from the file. This will get us the
+              # type of the entry, as well as the size of its uncompressed data.
+              #
+              # @param [IO, #read] fp the file to read the header from
+              # @return [Array(Integer, Integer)] the type and size of the entry packed
+              #   into a tuple.
+              def read_header(fp)
+                tags = Support::StringUtils.ord(fp.read(1))
+                type = (tags & 0x70) >> 4
+                size = tags & 0xF
+                shift = 4
+                while tags & 0x80 > 0
+                  tags = Support::StringUtils.ord(fp.read(1))
+                  size += (tags & 0x7F) << shift
+                  shift += 7
+                end
+                [type, size]
+              end
+              ##
+              # Reads data from the file, uncompressing along the way, until +size+ bytes
+              # have been decompressed. Since we don't know how much that will be ahead of time,
+              # this is annoying slow. Oh wells.
+              #
+              # @param [IO, #read] fp the IO source to read compressed data from
+              # @param [Integer] size the amount of uncompressed data to expect
+              # @return [String] the uncompressed data
+              def read_data(fp, size)
+                result = ""
+                z = Zlib::Inflate.new
+                start = fp.tell
+                while result.size < size && !z.stream_end?
+                  result += z.inflate(fp.read(1))
+                end
+                # final bytes... can't predict this yet though it's usually 5 bytes
+                while !fp.eof?
+                  begin
+                    result += z.finish
+                    break
+                  rescue Zlib::BufError
+                    result += z.inflate(fp.read(1))
+                  end
+                end
+                z.close
+                result
+              end
+            end
+            ##
+            # Calculates the hash of this particular entry. We need to reconstruct the loose object
+            # header to do this.
+            def calculate_hash!
+              prefix = PREFIX_NAME_LOOKUP[self.type]
+              # add special cases for refs
+              self.hash_id = StringUtils.sha1("#{prefix} #{self.size}\0#{self.content}").digest
+              self.hash_id.force_encoding("ASCII-8BIT") if RUBY_VERSION >= "1.9"
+            end
+            ##
+            # Converts to an actual raw object.
+            #
+            # @param [Support::RootedOpener] an opener in case this object references other things....
+            #   should usually be set
+            # @return [RawObject] this entry in raw object form
+            def to_raw_object(opener = nil)
+              RawObject.construct(hash_id, opener, PREFIX_NAME_LOOKUP[type], content)
+            end
+          end
+          attr_reader :index, :version, :size, :name
+          OBJ_COMMIT = 1
+          OBJ_TREE   = 2
+          OBJ_BLOB   = 3
+          OBJ_TAG    = 4
+          OBJ_OFS_DELTA = 6
+          OBJ_REF_DELTA = 7
+          DATA_START_OFFSET = 12
+          PREFIX_NAME_LOOKUP = {OBJ_COMMIT => 'commit', OBJ_TREE => 'tree', OBJ_BLOB => 'blob', OBJ_TAG => 'tag'}
+          ##
+          # Initializes a PackFile. Parses the header for some information but that's about it. It will
+          # however determine if there is an index file, and if so, it will load that for
+          # fast lookups later. It also verifies the fourcc of the packfile.
+          #
+          # @param [String] name the name of the packfile. This is relative to the directory it's in.
+          # @param [Support::RootedOpener] opener an opener that should be relative to the .git directory.
+          def initialize(name, opener)
+            @name = name
+            @opener = opener
+            opener.open(name, "r") do |fp|
+              # Check signature
+              unless fp.read(4) == "PACK"
+                raise ArgumentError.new("#{name} is not a packfile.")
+              end
+              @version = fp.read(4).unpack("N").first
+              @size    = fp.read(4).unpack("N").first
+              cur = fp.tell
+              fp.seek(0, IO::SEEK_END)
+              @end_of_data = fp.tell - 20
+            end
+            possible_index_path = name[0..(name.size - File.extname(name).size - 1)] + ".idx"
+            if File.exist? possible_index_path
+              # use a persistent file pointer
+              fp = File.open(possible_index_path, "r")
+              @index = PackFileIndex.parse(fp)
+            end
+            @offset_cache = {}
+          end
+          def cached_offset(given_hash)
+            @offset_cache[given_hash]
+          end
+          def cache_entry(entry)
+            @offset_cache[entry.hash_id] = entry.offset
+          end
+          ##
+          # Gets an object in the Git system with the provided SHA1 hash identifier.
+          # If this packfile has an associated index file, that will be used. Otherwise,
+          # the packfile can be scanned from the beginning to the end, caching offsets as
+          # it goes, enabling easy lookup later. Either way, a RawObject or a subclass of it
+          # will be returned, or nil if no matching object is found.
+          #
+          # @param [String] given_hash the SHA-1 of the desired object
+          # @return [RawObject] the object with the given ID. Nil if the object is not in the
+          #   packfile.
+          def object_for_hash(given_hash)
+            @opener.open(name, "r") do |fp|
+              given_hash.force_encoding("ASCII-8BIT") if RUBY_VERSION >= "1.9"
+              entry = nil
+              if index
+                starting_at = index.offset_for_hash(given_hash)
+                return PackFileEntry.at(starting_at, fp).to_raw_object
+              else
+                starting_at = cached_offset(given_hash) || DATA_START_OFFSET
+                fp.seek(starting_at, IO::SEEK_SET)
+                while fp.tell < @end_of_data
+                  entry = PackFileEntry.read(fp)
+                  cache_entry(entry)
+                  return entry.to_raw_object if entry.hash_id == given_hash
+                end
+              end
+            end
+            nil
+          end
+        end
+      end
+    end
+  end
+end