RubyGems - vcs2json - Versions diffs - 1.0.1 → 2.0.0 - Mend

vcs2json 1.0.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

checksums.yaml +4 -4
data/.gitignore +1 -0
data/lib/cli/main.rb +28 -9
data/lib/exceptions/unsupported_language.rb +4 -0
data/lib/srcML/srcml.rb +262 -0
data/lib/vcs2json/git.rb +217 -205
data/lib/vcs2json/logger.rb +43 -0
data/lib/vcs2json/version.rb +1 -1
data/lib/vcs2json_helper.rb +3 -0
data/vcs2json.gemspec +1 -0
metadata +19 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 13238e079776d7f77e6f301c9778f1cd93383e7c
-  data.tar.gz: ad0e55fbe2d9b7bba56d343fb51c8e2a4d08ed6c
+  metadata.gz: c3dfd8c30d50ef1760dd38b36725e30be0b09d9f
+  data.tar.gz: 842566ac9e598784693c76cc60c8a3531ca8b218
 SHA512:
-  metadata.gz: 77ad00f756a282c3b97fd471f1628bf78048f9fa2416ac0ae5b09e8ea430706d1edd67a97f2e19c7d6a88152446b5acde560135076d8c399b4d62386be083c95
-  data.tar.gz: 0ecea201b2eecbeae823c4a3fb3d31e37de37a07cd5b4d602efc7bd631f33c8bccf58e18e303e372a64641c28088e57d03bac1f3fcb8f3aeaa53c50ea926564b
+  metadata.gz: 74e7b098604e6437ef3df2a51af00e013c84c790b47502352d2c1bb8c251892c71d22fb65035a6dbd1217e4d27499dd8abd5d2c50f4668d0fe0dfed407ff4725
+  data.tar.gz: d53aa615168906592464caa112cf55ac0ff8f36a0a7274c3b98317e2c898588142034564f683d0f7a0670b53c78aa947eecbe5d7ea44d8a820702a758c3204de

data/.gitignore CHANGED Viewed

@@ -1,3 +1,4 @@
+*.DS_Store
 /.bundle/
 /.yardoc
 /Gemfile.lock

data/lib/cli/main.rb CHANGED Viewed

@@ -4,20 +4,39 @@ module Vcs2JsonCLI
   class Main < Thor
       map %w[--version -v] => :__print_version
+      class_option :logger_level, type: :string, default: 'error', desc: "The severity level to output to log"
+      class_option :logger_location, type: :string, desc: "Which file to print logs to"
       desc "--version, -v", "print the version"
       def __print_version
           puts Vcs2Json::VERSION
       end
-      method_option :ignore, type: :string, desc: "Specify location of .evocignore file"
-      method_option :case_id, type: :string, desc: "Specify case identifier. Used by .evocignore etc"
-      method_option :issue, :aliases => '-i', :type => :boolean, :default => false, :desc => "Attempt to extract issue ids from commit messages"
-      method_option :after, :aliases => '-a', :desc => "Only include commits after this date"
-      method_option :before, :aliases => '-b', :desc => "Only include commits before this date"
-      method_option :number, :aliases => '-n', type: :numeric, default: 10000, :desc => "The number of commits to dump"
+      ##
+      # default thor behavior is to return exit 0 on errors (i.e., success..)
+      # by having exit_on_failure return true, exit(1) is returned instead
+      def self.exit_on_failure?
+        true
+      end
+      class_option :ignore, type: :string, desc: "Specify location of .evocignore file"
+      class_option :case_id, type: :string, desc: "Specify case identifier. Used by .evocignore etc"
+      class_option :issue, :aliases => '-i', :type => :boolean, :default => false, :desc => "Attempt to extract issue ids from commit messages"
+      class_option :after, :aliases => '-a', :desc => "Only include commits after this date"
+      class_option :before, :aliases => '-b', :desc => "Only include commits before this date"
+      class_option :number, :aliases => '-n', type: :numeric, default: 10000, :desc => "The number of commits to dump"
+      class_option :fine_grained, type: :boolean, default: true, desc: "Include fine grained change information in output"
+      class_option :ignore_comments, type: :boolean, default: false, desc: "Ignore comments when calculating diffs. Only in effect for fine grained changes."
+      class_option :ignore_whitespace, type: :boolean, default: false, desc: "Ignore whitespace when calculating diffs. Only in effect for fine grained changes."
+      class_option :residuals, type: :boolean, default: true, desc: "Consider changes that happen outside of methods"
       desc "git [options]","Make a dump of the change-history of system using git, output on stdout"
-    def git
-          Vcs2Json::Git.new(options).execute
-    end
+      def git
+          Vcs2Json::Git.new(options).parse
+      end
+      # the default is to use git
+      default_task :git
   end
 end

data/lib/exceptions/unsupported_language.rb ADDED Viewed

@@ -0,0 +1,4 @@
+module SrcML
+    class UnsupportedLanguageError < StandardError
+    end
+end

data/lib/srcML/srcml.rb ADDED Viewed

@@ -0,0 +1,262 @@
+# Wrapper for the srcML commandline interface,
+# with functions specifically directed at extracting method names and calculating diffs between files.
+module SrcML
+  extend Logging
+  # Hash of supported languages.
+  #
+  # The file extension must map to a known format for srcML
+  LANGUAGES = {'.java' => 'Java',
+    '.C'    => 'C++',
+    '.cc'   => 'C++',
+    '.cpp'  => 'C++',
+    '.CPP'  => 'C++',
+    '.c++'  => 'C++',
+    '.cp'   => 'C++',
+    '.c'    => 'C'}
+  # Check that SrcML is available
+  if system("srcml", '--version')
+    SRCML = "srcml"
+  else
+    $stderr.puts "SrcML is required, please install from www.srcml.com"
+    exit
+  end
+  ##
+  # PUBLIC INTERFACE
+  ##
+  ###########
+  # OPTIONS #
+  ###########
+  # Whether to remove comments from the source
+  @@ignore_comments   = false
+  # Whether to remove whitespace from the source
+  @@ignore_whitespace = false
+  # Whether to qualify files with their full path or just their basename
+  # i.e., /lib/file.a or just file.a
+  @@basename_qualify  = false
+  # Whether to consider changes that happen outside of methods
+  @@residuals         = false
+  def self.ignore_comments= bool
+    unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
+    @@ignore_comments = bool
+  end
+  def self.ignore_comments?
+    @@ignore_comments
+  end
+  def self.ignore_whitespace= bool
+    unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
+    @@ignore_whitespace = bool
+  end
+  def self.ignore_whitespace?
+    @@ignore_whitespace
+  end
+  def self.basename_qualify= bool
+    unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
+    @@basename_qualify = bool
+  end
+  def self.basename_qualify?
+    @@basename_qualify
+  end
+  def self.residuals= bool
+    unless [true, false].include?(bool) then raise ArgumentError, "Must be boolean" end
+    @@residuals = bool
+  end
+  def self.residuals?
+    @@residuals
+  end
+  ###########
+  # METHODS #
+  ###########
+  ##
+  # Calculates the AST of the given file
+  #
+  # @param [String] path the path to the file
+  # @param [String] rev if specified, retrieves the file from the given revision
+  #
+  # @return [Nokogiri::XML::Document] an AST representation
+  def self.ast(path,revision: FALSE)
+    # get the file content
+    ast = ''
+    if revision
+      # explicitly call bash to get support for process substitution
+      if language = LANGUAGES[File.extname(path)]
+        ast,e,s = Open3.capture3("bash -c '#{SRCML} --language #{language} <(git show #{revision}:#{path})'")
+        if !s.success?
+          raise ArgumentError, e
+        end
+      else
+        raise SrcML::UnsupportedLanguageError, "Language in the file '#{path}' not supported (guessed language from file type)"
+      end
+    else
+      ast,e,s = Open3.capture3("#{SRCML} #{path}")
+      if !s.success?
+        raise ArgumentError, e
+      end
+    end
+    # turn into structured xml
+    xml = Nokogiri::XML(ast)
+    if ignore_comments?
+      # remove all comments
+      xml.search('comment').each do |c|
+        # trailing newline + any number of spaces are removed from the previous node
+        # this gives a more intuitive behaviour
+        # i.e., the newline + spaces before the comment is considered "part of" the comment
+        if previous_node = c.previous_sibling
+          previous_node.content = previous_node.content.gsub(/\n(\s)*/,"")
+        end
+        # now remove the comment
+        c.remove
+      end
+    end
+    if ignore_whitespace?
+      # remove all new lines
+      xml.search("text()").each do |node|
+        if node.content =~ /\S/
+          node.content = node.content.gsub(/[[:space:]]([[:space:]])*/,"")
+        else
+          node.remove
+        end
+      end
+    end
+    return xml
+  end
+  ##
+  # Returns the methods of the given file
+  #
+  # If the method has any parameters, the parameter types are also returned with the method name
+  #
+  # @param [String] path the path to the file
+  # @param [String] rev if specified, retrieves the file from the given revision
+  # @return [Hash[method_name => method_hash]] a hash storing the methods as keys and the hashed method as values
+  def self.methods(path,revision: FALSE)
+    ast = ast(path,revision: revision)
+    # hash each method and store in hash map with function name as key
+    methods = Hash.new
+    qualified_file = path
+    if basename_qualify?
+      qualified_file = File.basename(path)
+    end
+    # split file based on class declarations
+    partitions = ast.search("class")
+    if partitions.empty?
+      # no classes, just use the full ast
+      partitions = [ast]
+    end
+    partitions.each do |partition|
+      # if partitioned into classes, attempt to extract class name
+      class_name = ''
+      if !partition.document? & name = partition.at_css("/name")
+        class_name = name.text
+      else
+        logger.debug "(#{qualified_file}) Found partitioned file but could not find classname for this partition at location \\name'. Context:\n###\n#{partition}\n###"
+      end
+      partition.search("function").each do |function|
+        if name = function.at_css("/name")
+          # attempt to extract parameters
+          parameters = []
+          if parameter_list = function.at_css("/parameter_list")
+            parameter_list.search("parameter").each do |p|
+              if parameter = (p.at_css("decl type name name") or p.at_css("decl type name") or p.at_css("decl type") or p.at_css("type") or p.at_css("name"))
+                parameters << parameter.text
+              else
+                logger.debug "(#{qualified_file}) Function: #{name}. Nested structures: (decl type name name) or (decl type name) or (decl type) not found in parameter xml, ignoring this parameter. Context:\n###\n#{p}\n###"
+              end
+            end
+          else
+            logger.debug "(#{qualified_file}) Parameter list not found for Function: #{name}. Searched for structure '/parameter_list'. Context:\n###\n#{function}\n###"
+          end
+          method_name = parameters.empty? ? name.text : name.text+"("+parameters.join(',')+")"
+          fully_qualified_name = class_name.empty? ? [qualified_file,method_name].join(':') : [qualified_file,class_name,method_name].join(':')
+          if block = function.at_css("block")
+            methods[fully_qualified_name] = block.content.hash
+          else
+            logger.debug "(#{qualified_file}) No <block> (i.e. the function content) in the function xml. Function: #{name}."
+          end
+        else
+          logger.debug "(#{qualified_file}) Could not identify function name at location '\\name'. Context:\n###\n#{p}\n###"
+        end
+      end
+    end
+    if residuals?
+      # add residuals entry
+      # i.e., whats left of the code when all methods are removed
+      ast.search("function").remove
+      methods[qualified_file+':'+'@residuals'] = ast.content.hash
+    end
+    return methods
+  end
+  ##
+  # Given two Hashes, returns all the keys that either have different values in the two hashes
+  # or are not in both hashes.
+  #
+  # @param: [Hash] old
+  # @param: [Hash] new
+  # @return [Array<String>]
+  def self.different_entries(old,new)
+    different = []
+    new.each do |k,v|
+      # new keys
+      if !old.key?(k)
+        #    puts "KEY NOT IN OLD: #{k}"
+        different << k
+        # different values for same key
+      elsif v != old[k]
+        #    puts "DIFFERENT VALUES SAME KEY\nOLD WAS:\n--\n#{old[k].split(//)}\n--\nNEW WAS:\n--\n#{v.split(//)}\n--"
+        different << k
+      end
+    end
+    # keys that are only in old
+    deleted_keys = old.keys - new.keys
+    if !deleted_keys.empty?
+      # puts "KEY NOT IN NEW: #{deleted_keys}"
+      different.concat(deleted_keys)
+    end
+    return different
+  end
+  ##
+  # Calculate the changed methods of the file specified by revision and path
+  #
+  # @param [String] old the path to the old file
+  # @param [String] new the path to the new file
+  # @return [Array<String>] the changed methods
+  def self.changed_methods(old,new)
+    methods_old = methods(old)
+    methods_new = methods(new)
+    return different_entries(methods_old,methods_new)
+  end
+  ##
+  # Like #changed_methods but retrieves the file from a git revision
+  # Calculate the changed methods of the file specified by revision and path
+  #
+  # @param [String] path the path to the file
+  # @param [String] revision the revision to retrieve the file from
+  # @return [Array<String>] the changed methods
+  def self.changed_methods_git(path,revision)
+    methods_new = methods(path, revision: revision)
+    methods_old = methods(path, revision: revision+'~1')
+    return different_entries(methods_old,methods_new)
+  end
+end

data/lib/vcs2json/git.rb CHANGED Viewed

@@ -1,230 +1,242 @@
 require_relative '../vcs2json_helper'
 module Vcs2Json
-    class Git
-        # Generate separators between fields and commits
-        FIELD_SEP  = Digest::SHA256.hexdigest Time.new.to_s + "field_sep"
-        COMMIT_SEP = Digest::SHA256.hexdigest Time.new.to_s + "commit_sep"
-        def initialize(opts)
-            @opts = opts
-            self.ignore = @opts[:ignore]
-            # Create a commit hash that defaults to creating new hashes given hash[:key]
-            # so we can do 'commit[:commit][:author][:name] = .. ' without creating the :commit and :author hashes first
-            @commits = Hash.new {|h,k| h[k] = Hash.new(&h.default_proc) }
-            # place to stare empty commit ids if they are encountered
-            @empty_commits = []
-            # used to decide if we should try to search for more commits
-            @oldest_commit_in_previous_search = Time.now
-        end
-        def execute
-            # recursively add commits as long as we have less than :number and there are still more commits to search
-            begin
-                add_commits(@opts)
-                add_integer_mapping
+  class Git
+    include Logging
+    attr_accessor :number, :fine_grained, :case_id
+    attr_reader :ignore
+    # Generate separators between fields and commits
+    FIELD_SEP  = Digest::SHA256.hexdigest Time.new.to_s + "field_sep"
+    META_DATA = "%H#{FIELD_SEP}"\
+      "%an#{FIELD_SEP}"\
+      "%ae#{FIELD_SEP}"\
+      "%ad#{FIELD_SEP}"\
+      "%cn#{FIELD_SEP}"\
+      "%ce#{FIELD_SEP}"\
+      "%cd#{FIELD_SEP}"\
+      "%B"
+    def initialize(opts)
+      self.ignore = opts[:ignore]
+      self.before = opts[:before]
+      self.after = opts[:after]
+      self.number = opts[:number]
+      self.fine_grained = opts[:fine_grained]
+      # Set logger level
+      Logging.set_location(opts[:logger_location])
+      Logging.set_level(opts[:logger_level])
+      SrcML.ignore_comments = opts[:ignore_comments]
+      SrcML.ignore_whitespace = opts[:ignore_whitespace]
+      SrcML.residuals = opts[:residuals]
+    end
-                # sort on date and prune excessive commits
-                sorted_and_pruned = @commits.sort_by {|id,commit| commit[:date]}.reverse.map {|(_,commit)| commit}.first(@opts[:number])
+    def after=(after)
+      if !after.nil?
+        begin
+          Date.parse(after)
+          @after = after
+        rescue
+          STDERR.puts "Invalid date --after=#{after}. Ignoring option."
+          @after = nil
+        end
+      end
+    end
-                # print commits to stdout as json
-                $stdout.puts JSON.pretty_generate(sorted_and_pruned)
+    def after
+      @after.nil? ? '' : "--after=\"#{@after}\""
+    end
-                # print ids of empty commits to stderr
-                if !@empty_commits.empty?
-                    STDERR.puts "EMPTY COMMITS"
-                    STDERR.puts @empty_commits
-                end
-                # print additional info to stderr
-                STDERR.puts "\n\nExtracted #{sorted_and_pruned.size} commits."
-            rescue EncodingError => e
-                puts e
-            end
+    def before=(before)
+      if !before.nil?
+        begin
+          Date.parse(before)
+          @before = before
+        rescue
+          STDERR.puts "Invalid date --before=#{before}. Ignoring option."
+          @before = nil
         end
+      end
+    end
-        def ignore
-            @ignore
-        end
+    def before
+      @before.nil? ? '' : "--before=\"#{@before}\""
+    end
-        def ignore= path
-            default_locations = ["#{Dir.pwd}/.evocignore","~/.evocignore"]
-            paths = (path.nil? ? default_locations : [path] + default_locations)
-            file = nil
-            ignore = []
-            paths.each do |p|
-                if File.exist?(p)
-                    file = File.open(p)
-                    STDERR.puts "Loading files to ignore from #{file.path}"
-                    # return first match
-                    break
-                end
-            end
-            if file.nil?
-                STDERR.puts ".evocignore not found. Tried #{paths}. All files will be used."
-            else
-                if @opts[:case_id].nil?
-                    STDERR.puts "Id in .evocignore not specified, not ignoring any files."
-                else
-                    ignore_file = YAML.load(file)
-                    if ignore_file.key?(@opts[:case_id])
-                        ignore = ignore_file[@opts[:case_id]]
-                        if !ignore.nil?
-                            STDERR.puts "Ignoring #{ignore.size} files"
-                        end
-                    else
-                        STDERR.puts "The id: '#{@opts[:case_id]}' not found in #{file.path}"
-                    end
-                end
-            end
-            @ignore = (ignore.nil? ? [] : ignore)
-            return @ignore
-        end
-        private
-        def add_commits(opts)
-            add_meta_information(opts)
-            add_change_information(opts)
-            if @commits.size < @opts[:number]
-                oldest_commit_in_this_search = get_oldest_commit
-                if oldest_commit_in_this_search != @oldest_commit_in_previous_search
-                    # we found new commits in this search but still need more
-                    @oldest_commit_in_previous_search = oldest_commit_in_this_search
-                    add_commits(before: oldest_commit_in_this_search, number: (@opts[:number] - @commits.size)*2)
-                else
-                    STDERR.puts "\nAsked for #{@opts[:number]} commits, only found #{@commits.size} non-empty commits. Searched all the way back to #{oldest_commit_in_this_search}."
-                end
+    def parse
+      # keeps track of number of commits successfully parsed
+      commit_counter = 0
+      # keeps track of empty commits
+      empty_commits = []
+      ##########################
+      # GET LIST OF COMMIT IDS #
+      ##########################
+      # getting the list of revision ids is cheap, so we get some extra in case we are unable to parse the required amount in the first 'n' commits
+      commit_ids = `git rev-list HEAD #{self.before} #{self.after} -n #{self.number*10} --no-merges`.split
+      ############################
+      # ITERATE OVER EACH COMMIT #
+      ############################
+      commit_ids.each do |id|
+        logger.debug "Parsing commit: #{id}"
+        # get the changed files
+        changed_files = `git log --pretty=format:'' --name-status #{id} -n 1`.split("\n")
+        # removed ignored files
+        changed_files.reject! {|i| self.ignore.include?(i)}
+        # add files changed info
+        if !changed_files.empty?
+          ##################
+          # FETCH METADATA #
+          ##################
+          raw_commit = `git log --pretty=format:'#{META_DATA}' #{id} -n 1`
+          commit = ''
+          ##################
+          # CLEAN RAW DATA #
+          ##################
+          begin
+            # try encoding to utf8
+            commit = raw_commit.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
+            # need to expliceitely check if the encoding is valid for ruby <= 2.0
+            # utf8 -> utf8 will not do anything even with invalid bytes
+            # http://stackoverflow.com/questions/24036821/ruby-2-0-0-stringmatch-argumenterror-invalid-byte-sequence-in-utf-8
+            if !commit.valid_encoding?
+              # encode to utf16 first and then back to utf8
+              commit.encode!("UTF-16be", invalid: :replace, undef: :replace, :replace=>'')
+              commit.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
             end
-        end
-        def get_oldest_commit
-            oldest = nil
-            if !@commits.empty?
-                oldest = @commits.first[1][:date]
-                @commits.each do |sha,info|
-                    if info[:date] < oldest
-                        oldest = info[:date]
-                    end
+          rescue ArgumentError
+            raise EncodingError.new, "Unable to encode input as UTF-8"
+          end
+          ##############################
+          # CONSTRUCT OUTPUT HASH/JSON #
+          ##############################
+          output_hash = Hash.new
+          fields = commit.split(FIELD_SEP)
+          sha = fields[0].delete("\n") #remove astray newlines
+          output_hash[:sha]            = sha
+          output_hash[:name]           = fields[1]
+          output_hash[:email]          = fields[2]
+          output_hash[:date]           = Time.parse fields[3]
+          output_hash[:committer_name] = fields[4]
+          output_hash[:committer_email]= fields[5]
+          output_hash[:committer_date] = Time.parse fields[6]
+          output_hash[:message]        = fields[7]
+          output_hash[:changes] = []
+          #######################################
+          # PARSE FILES FOR FINEGRAINED CHANGES #
+          #######################################
+          # print progress
+          changed_files.each_with_index do |line,index|
+          STDERR.print "Parsing file #{index+1} of #{changed_files.size} in commit #{commit_counter+1} of #{self.number}                  \r"
+            if !line.empty?
+              file_info = line.split("\t")
+              file_name = file_info[1]
+              status = file_info[0]
+              # add finer grained change info
+              if self.fine_grained
+                begin
+                  # new file, all methods are new, no need to calculate diff
+                  if status == 'A'
+                    SrcML.methods(file_name,revision: id).keys.each {|m| output_hash[:changes] << m}
+                    # calculate diffs
+                  else
+                    SrcML.changed_methods_git(file_name,id).each {|m| output_hash[:changes] << m}
+                  end
+                rescue SrcML::UnsupportedLanguageError
+                  output_hash[:changes] << file_name
                 end
+              else
+                output_hash[:changes] << file_name
+              end
             end
-            return oldest
-        end
+          end # changes_files.each
-        def hash_2_gitoptions(opts)
-            before = opts[:before].nil? ? '' : "--before=\"#{opts[:before]}\""
-            after = opts[:after].nil?   ? '' : "--after=\"#{opts[:after]}\""
-            number = opts[:number].nil? ? '' : "-n #{opts[:number]}"
-            return "#{before} #{after} #{number} --no-merges"
-        end
+          # Only add commits where at least on changes was detected
+          if !output_hash[:changes].empty?
+            ###########################
+            # PRINT COMMIT TO $stdout #
+            ###########################
-        def add_meta_information(opts)
-            raw_commits = `git log #{hash_2_gitoptions(opts)} --pretty=format:'%H#{FIELD_SEP}%cn#{FIELD_SEP}%ce#{FIELD_SEP}%cd#{FIELD_SEP}%ad#{FIELD_SEP}%B#{COMMIT_SEP}'`
-            begin
-                encoded = ''
-                # try encoding to utf8
-                encoded = raw_commits.encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
-                # need to expliceitely check if the encoding is valid for ruby <= 2.0
-                # utf8 -> utf8 will not do anything even with invalid bytes
-                # http://stackoverflow.com/questions/24036821/ruby-2-0-0-stringmatch-argumenterror-invalid-byte-sequence-in-utf-8
-                if !encoded.valid_encoding?
-                    # encode to utf16 first and then back to utf8
-                    encoded.encode!("UTF-16be", invalid: :replace, undef: :replace, :replace=>'')
-                    encoded.encode!('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '')
-                end
-                # split into individual commits
-                commits_info = encoded.split(COMMIT_SEP)
-            rescue ArgumentError
-                raise EncodingError.new, "Unable to encode input as UTF-8"
-            end
+            $stdout.puts output_hash.to_json
-            commits_info.each do |commit|
-                fields = commit.split(FIELD_SEP)
-                sha = fields[0].delete("\n") #remove astray newlines
-                @commits[sha][:sha]            = sha
-                @commits[sha][:name]           = fields[1]
-                @commits[sha][:email]          = fields[2]
-                @commits[sha][:date]           = Time.parse fields[3]
-                @commits[sha][:author_date]    = Time.parse fields[4]
-                @commits[sha][:message]        = fields[5]
-                # attempt to parse an issue id from the commit message
-                if @opts[:issue]
-                    @commits[commit[0]][:issue] = parse_issue(@commits[sha][:message])
-                end
-            end
-        end
+            # increase counter for number of commits successfully parsed
+            commit_counter += 1
-        def add_change_information(opts)
-            commits_changes_type = `git log --pretty=format:'#{FIELD_SEP}%H' --name-status #{hash_2_gitoptions(opts)}`.split(FIELD_SEP)
-            commits_changes_type.each do |commit|
-                if !commit.empty?
-                    lines = commit.split("\n")
-                    sha = lines[0]
-                    @commits[sha][:changes][:all] = []
-                    if lines.size > 1
-                        lines[1..-1].each do |line|
-                            if !line.empty?
-                                file_info = line.split("\t")
-                                file_name = file_info[1]
-                                status = file_info[0]
-                                @commits[sha][:changes][:all] << file_name
-                                @commits[sha][:changes][:details][file_name][:filename] = file_name
-                                @commits[sha][:changes][:details][file_name][:status] = parse_status(status)
-                            end
-                        end
-                    end
-                    # filter out ignored files
-                    if !self.ignore.nil?
-                        @commits[sha][:changes][:all].reject! {|i| self.ignore.include?(i)}
-                    end
-                    if @commits[sha][:changes][:all].empty?
-                        @empty_commits << sha
-                        @commits.delete(sha)
-                    end
-                end
-            end
-        end
+            ########################################
+            # CHECK IF REQUESTED AMOUNT IS REACHED #
+            ########################################
-        def add_integer_mapping
-            # create file_name -> integer mapping
-            mapping = Hash.new
-            index_counter = 0
-            @commits.each do |sha,info|
-                integer_representation = []
-                info[:changes][:all].each do |file|
-                    if mapping[file].nil?
-                        mapping[file] = index_counter
-                        index_counter += 1
-                    end
-                    integer_representation << mapping[file]
-                    info[:changes][:details][file][:id] = mapping[file]
-                end
-                info[:changes][:all].clear
-                info[:changes][:all] = integer_representation
+            if commit_counter == self.number
+              break # out of loop
             end
+          else # no changes detected in commit
+            empty_commits << id
+          end
+        else # no files in commit
+          empty_commits << id
         end
+      end
+      # we may still lack commits after exhaustive search, notify user
+      if commit_counter < self.number
+        STDERR.puts "Asked for #{self.number} commits, only found #{commit_counter} non-empty commits in the last #{self.number*2} commits"
+      end
+      # print ids of empty commits to stderr
+      if !empty_commits.empty?
+        STDERR.puts "EMPTY COMMITS"
+        STDERR.puts empty_commits
+      end
+    end
-        # simply un-abbreviates the status code given by --name-status
-        def parse_status(abbreviated_status)
-            case abbreviated_status
-            when "A"
-                "added"
-            when "M"
-                "modified"
-            when "D"
-                "deleted"
-            end
+    def ignore= path
+      default_locations = ["#{Dir.pwd}/.evocignore","~/.evocignore"]
+      paths = (path.nil? ? default_locations : [path] + default_locations)
+      file = nil
+      ignore = []
+      paths.each do |p|
+        if File.exist?(p)
+          file = File.open(p)
+          STDERR.puts "Loading files to ignore from #{file.path}"
+          # return first match
+          break
         end
-        # attempts to parse an issue/bug id from the given commit message
-        def parse_issue(message)
-            if match = /(bug|issue) (?<id>\d+)/i.match(message)
-                return match[:id]
-            else
-                return ""
+      end
+      if file.nil?
+        STDERR.puts ".evocignore not found. Tried #{paths}. All files will be used."
+      else
+        if self.case_id.nil?
+          STDERR.puts "Id in .evocignore not specified, not ignoring any files."
+        else
+          ignore_file = YAML.load(file)
+          if ignore_file.key?(self.case_id)
+            ignore = ignore_file[self.case_id]
+            if !ignore.nil?
+              STDERR.puts "Ignoring #{ignore.size} files"
             end
+          else
+            STDERR.puts "The id: '#{self.case_id}' not found in #{file.path}"
+          end
         end
+      end
+      @ignore = (ignore.nil? ? [] : ignore)
+      return @ignore
     end
+  end
 end

data/lib/vcs2json/logger.rb ADDED Viewed

@@ -0,0 +1,43 @@
+# enable logging in classes through 'include Logging'
+module Logging
+  def logger
+    @logger ||= Logging.logger_for(self.class.name)
+  end
+  # Use a hash class-ivar to cache a unique Logger per class:
+  @loggers = {}
+  @logger_level = 'debug'
+  @logger_location = 'vcs2json.log'
+  class << self
+    def logger_for(classname)
+      @loggers[classname] ||= configure_logger_for(classname)
+    end
+    def configure_logger_for(classname)
+      logger = Logger.new(@logger_location,'daily')
+      logger.progname = classname
+      logger.level = const_get('Logger::'+@logger_level.upcase)
+      logger
+    end
+    def set_location(path)
+      @logger_location = path
+    end
+    def set_level(level)
+      possible_levels = %w(debug info warn error info)
+      if !level.nil?
+        if !level.empty?
+          if possible_levels.include?(level)
+            STDERR.puts "Logging level has been set to '#{level}' for output to #{@logger_location}"
+            @loggers.each {|l| l.level = const_get('Logger::'+level.upcase)}
+            @logger_level = level
+          else
+            STDERR.puts "Unable to set logger level to #{level}, possible values are #{possible_levels}. Defaulting to 'info'."
+          end
+        end
+      end
+    end
+  end
+end

data/lib/vcs2json/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Vcs2Json
-  VERSION = "1.0.1"
+  VERSION = "2.0.0"
 end

data/lib/vcs2json_helper.rb CHANGED Viewed

@@ -4,5 +4,8 @@ require 'json/pure'
 require 'time'
 require 'csv'
 require 'chronic'
+require 'logger'      # leveled logging
+require 'nokogiri'    # better/faster xml library
+require 'open3'       # make system calls and capture stdout/stderr/exitcodes easily
 require 'require_all'
 require_rel '/**/*.rb'

data/vcs2json.gemspec CHANGED Viewed

@@ -25,4 +25,5 @@ Gem::Specification.new do |spec|
   spec.add_runtime_dependency "require_all"
   spec.add_runtime_dependency "json_pure"
   spec.add_runtime_dependency "chronic"
+  spec.add_runtime_dependency "nokogiri"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: vcs2json
 version: !ruby/object:Gem::Version
-  version: 1.0.1
+  version: 2.0.0
 platform: ruby
 authors:
 - Thomas Rolfsnes
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2016-06-10 00:00:00.000000000 Z
+date: 2016-09-23 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -108,6 +108,20 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: '0'
+- !ruby/object:Gem::Dependency
+  name: nokogiri
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
 description:
 email:
 - mail@thomasrolfsnes.com
@@ -136,7 +150,10 @@ files:
 - lib/exceptions/no_date_field.rb
 - lib/exceptions/no_file_section.rb
 - lib/exceptions/no_time_data_in_chafiles_field.rb
+- lib/exceptions/unsupported_language.rb
+- lib/srcML/srcml.rb
 - lib/vcs2json/git.rb
+- lib/vcs2json/logger.rb
 - lib/vcs2json/version.rb
 - lib/vcs2json_helper.rb
 - vcs2json.gemspec