RubyGems - daru - Versions diffs - 0.1.5 → 0.3 - Mend

daru 0.1.5 → 0.3

Files changed (106) hide show

checksums.yaml +5 -5
data/.github/ISSUE_TEMPLATE.md +18 -0
data/.gitignore +1 -0
data/.rubocop.yml +21 -7
data/.travis.yml +10 -5
data/CONTRIBUTING.md +15 -10
data/History.md +124 -2
data/README.md +37 -9
data/ReleasePolicy.md +20 -0
data/benchmarks/db_loading.rb +34 -0
data/benchmarks/statistics.rb +6 -6
data/benchmarks/where_clause.rb +1 -1
data/benchmarks/where_vs_filter.rb +1 -1
data/daru.gemspec +17 -41
data/lib/daru.rb +10 -13
data/lib/daru/accessors/gsl_wrapper.rb +1 -1
data/lib/daru/accessors/nmatrix_wrapper.rb +2 -0
data/lib/daru/category.rb +29 -15
data/lib/daru/configuration.rb +34 -0
data/lib/daru/core/group_by.rb +158 -77
data/lib/daru/core/merge.rb +12 -3
data/lib/daru/core/query.rb +20 -4
data/lib/daru/dataframe.rb +692 -118
data/lib/daru/date_time/index.rb +14 -11
data/lib/daru/date_time/offsets.rb +9 -1
data/lib/daru/extensions/which_dsl.rb +55 -0
data/lib/daru/formatters/table.rb +3 -5
data/lib/daru/index/categorical_index.rb +4 -4
data/lib/daru/index/index.rb +131 -42
data/lib/daru/index/multi_index.rb +118 -10
data/lib/daru/io/csv/converters.rb +21 -0
data/lib/daru/io/io.rb +105 -33
data/lib/daru/io/sql_data_source.rb +10 -0
data/lib/daru/iruby/templates/dataframe.html.erb +4 -51
data/lib/daru/iruby/templates/dataframe_mi.html.erb +3 -56
data/lib/daru/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
data/lib/daru/iruby/templates/dataframe_mi_thead.html.erb +21 -0
data/lib/daru/iruby/templates/dataframe_tbody.html.erb +28 -0
data/lib/daru/iruby/templates/dataframe_thead.html.erb +21 -0
data/lib/daru/iruby/templates/vector.html.erb +3 -25
data/lib/daru/iruby/templates/vector_mi.html.erb +3 -34
data/lib/daru/iruby/templates/vector_mi_tbody.html.erb +26 -0
data/lib/daru/iruby/templates/vector_mi_thead.html.erb +8 -0
data/lib/daru/iruby/templates/vector_tbody.html.erb +17 -0
data/lib/daru/iruby/templates/vector_thead.html.erb +8 -0
data/lib/daru/maths/arithmetic/vector.rb +38 -2
data/lib/daru/maths/statistics/dataframe.rb +28 -30
data/lib/daru/maths/statistics/vector.rb +295 -41
data/lib/daru/plotting/gruff/dataframe.rb +13 -15
data/lib/daru/plotting/nyaplot/category.rb +1 -1
data/lib/daru/plotting/nyaplot/dataframe.rb +15 -4
data/lib/daru/plotting/nyaplot/vector.rb +1 -2
data/lib/daru/vector.rb +308 -96
data/lib/daru/version.rb +1 -1
data/profile/vector_new.rb +9 -0
data/spec/accessors/gsl_wrapper_spec.rb +38 -35
data/spec/accessors/nmatrix_wrapper_spec.rb +25 -22
data/spec/category_spec.rb +24 -20
data/spec/core/group_by_spec.rb +238 -4
data/spec/core/merge_spec.rb +1 -1
data/spec/core/query_spec.rb +65 -50
data/spec/daru_spec.rb +22 -0
data/spec/dataframe_spec.rb +473 -16
data/spec/date_time/date_time_index_helper_spec.rb +72 -0
data/spec/date_time/index_spec.rb +34 -16
data/spec/date_time/offsets_spec.rb +14 -0
data/spec/extensions/rserve_spec.rb +1 -1
data/spec/extensions/which_dsl_spec.rb +38 -0
data/spec/fixtures/boolean_converter_test.csv +5 -0
data/spec/fixtures/duplicates.csv +32 -0
data/spec/fixtures/eciresults.html +394 -0
data/spec/fixtures/empty_rows_test.csv +17 -0
data/spec/fixtures/macau.html +3691 -0
data/spec/fixtures/macd_data.csv +150 -0
data/spec/fixtures/matrix_test.csv +55 -55
data/spec/fixtures/moneycontrol.html +6812 -0
data/spec/fixtures/string_converter_test.csv +5 -0
data/spec/fixtures/test_xls.xls +0 -0
data/spec/fixtures/test_xls_2.xls +0 -0
data/spec/fixtures/url_test.txt~ +0 -0
data/spec/fixtures/valid_markup.html +62 -0
data/spec/fixtures/wiki_climate.html +1243 -0
data/spec/fixtures/wiki_table_info.html +631 -0
data/spec/formatters/table_formatter_spec.rb +29 -0
data/spec/index/categorical_index_spec.rb +33 -33
data/spec/index/index_spec.rb +160 -41
data/spec/index/multi_index_spec.rb +143 -33
data/spec/io/io_spec.rb +246 -2
data/spec/io/sql_data_source_spec.rb +31 -41
data/spec/iruby/dataframe_spec.rb +17 -19
data/spec/iruby/vector_spec.rb +26 -28
data/spec/maths/arithmetic/dataframe_spec.rb +1 -1
data/spec/maths/arithmetic/vector_spec.rb +18 -0
data/spec/maths/statistics/vector_spec.rb +153 -15
data/spec/plotting/gruff/category_spec.rb +3 -3
data/spec/plotting/gruff/dataframe_spec.rb +14 -4
data/spec/plotting/gruff/vector_spec.rb +9 -9
data/spec/plotting/nyaplot/category_spec.rb +5 -9
data/spec/plotting/nyaplot/dataframe_spec.rb +95 -47
data/spec/plotting/nyaplot/vector_spec.rb +5 -11
data/spec/shared/vector_display_spec.rb +12 -14
data/spec/spec_helper.rb +30 -7
data/spec/support/matchers.rb +5 -0
data/spec/vector_spec.rb +306 -72
metadata +96 -55
data/spec/fixtures/stock_data.csv +0 -500

@@ -17,17 +17,17 @@ module Daru
         end
       end
-      def initialize left_df, right_df, opts={}
+      def initialize left_df, right_df, opts={} # rubocop:disable Metrics/AbcSize -- quick-fix for issue #171
         init_opts(opts)
         validate_on!(left_df, right_df)
         key_sanitizer = ->(h) { sanitize_merge_keys(h.values_at(*on)) }
         @left = df_to_a(left_df)
-        @left.sort_by!(&key_sanitizer)
+        @left.sort! { |a, b| safe_compare(a.values_at(*on), b.values_at(*on)) }
         @left_key_values = @left.map(&key_sanitizer)
         @right = df_to_a(right_df)
-        @right.sort_by!(&key_sanitizer)
+        @right.sort! { |a, b| safe_compare(a.values_at(*on), b.values_at(*on)) }
         @right_key_values = @right.map(&key_sanitizer)
         @left_keys, @right_keys = merge_keys(left_df, right_df, on)
@@ -246,6 +246,15 @@ module Daru
             raise ArgumentError, "Both dataframes expected to have #{on.inspect} field"
         end
       end
+      def safe_compare(left_array, right_array)
+        left_array.zip(right_array).map { |l, r|
+          next 0 if l.nil? && r.nil?
+          next 1 if r.nil?
+          next -1 if l.nil?
+          l <=> r
+        }.reject(&:zero?).first || 0
+      end
     end
     module Merge

data/lib/daru/core/query.rb CHANGED

@@ -9,13 +9,13 @@ module Daru
         end
         def & other
-          BoolArray.new @barry.zip(other.barry).map { |b, o| b && o }
+          BoolArray.new(@barry.zip(other.barry).map { |b, o| b && o })
         end
         alias :and :&
         def | other
-          BoolArray.new @barry.zip(other.barry).map { |b, o| b || o }
+          BoolArray.new(@barry.zip(other.barry).map { |b, o| b || o })
         end
         alias :or :|
@@ -39,11 +39,11 @@ module Daru
       class << self
         def apply_scalar_operator operator, data, other
-          BoolArray.new data.map { |d| !!d.send(operator, other) }
+          BoolArray.new(data.map { |d| !!d.send(operator, other) if d.respond_to?(operator) })
         end
         def apply_vector_operator operator, vector, other
-          BoolArray.new vector.zip(other).map { |d, o| !!d.send(operator, o) }
+          BoolArray.new(vector.zip(other).map { |d, o| !!d.send(operator, o) })
         end
         def df_where data_frame, bool_array
@@ -70,6 +70,22 @@ module Daru
           resultant_dv
         end
+        def vector_apply_where dv, bool_array
+          _data, new_index = fetch_new_data_and_index dv, bool_array
+          all_index        = dv.index
+          all_data         = all_index.map { |idx| new_index.include?(idx) ? yield(dv[idx]) : dv[idx] }
+          resultant_dv = Daru::Vector.new all_data,
+            index: dv.index.class.new(all_index),
+            dtype: dv.dtype,
+            type: dv.type,
+            name: dv.name
+          # Preserve categories order for category vector
+          resultant_dv.categories = dv.categories if dv.category?
+          resultant_dv
+        end
         private
         def fetch_new_data_and_index dv, bool_array

data/lib/daru/dataframe.rb CHANGED

@@ -10,7 +10,10 @@ module Daru
     include Daru::Maths::Arithmetic::DataFrame
     include Daru::Maths::Statistics::DataFrame
     # TODO: Remove this line but its causing erros due to unkown reason
-    include Daru::Plotting::DataFrame::NyaplotLibrary if Daru.has_nyaplot?
+    Daru.has_nyaplot?
+    attr_accessor(*Configuration::INSPECT_OPTIONS_KEYS)
     extend Gem::Deprecate
     class << self
@@ -20,7 +23,7 @@ module Daru
       #
       # == Arguments
       #
-      # * path - Path of the file to load specified as a String.
+      # * path - Local path / Remote URL of the file to load specified as a String.
       #
       # == Options
       #
@@ -63,7 +66,7 @@ module Daru
       # Read a database query and returns a Dataset
       #
-      # @param dbh [DBI::DatabaseHandle] A DBI connection to be used to run the query
+      # @param dbh [DBI::DatabaseHandle, String] A DBI connection OR Path to a SQlite3 database.
       # @param query [String] The query to be executed
       #
       # @return A dataframe containing the data resulting from the query
@@ -72,6 +75,11 @@ module Daru
       #
       #  dbh = DBI.connect("DBI:Mysql:database:localhost", "user", "password")
       #  Daru::DataFrame.from_sql(dbh, "SELECT * FROM test")
+      #
+      #  #Alternatively
+      #
+      #  require 'dbi'
+      #  Daru::DataFrame.from_sql("path/to/sqlite.db", "SELECT * FROM test")
       def from_sql dbh, query
         Daru::IO.from_sql dbh, query
       end
@@ -79,7 +87,7 @@ module Daru
       # Read a dataframe from AR::Relation
       #
       # @param relation [ActiveRecord::Relation] An AR::Relation object from which data is loaded
-      # @params fields [Array] Field names to be loaded (optional)
+      # @param fields [Array] Field names to be loaded (optional)
       #
       # @return A dataframe containing the data loaded from the relation
       #
@@ -112,6 +120,49 @@ module Daru
         Daru::IO.from_plaintext path, fields
       end
+      # Read the table data from a remote html file. Please note that this module
+      # works only for static table elements on a HTML page, and won't work in
+      # cases where the data is being loaded into the HTML table by Javascript.
+      #
+      # By default - all <th> tag elements in the first proper row are considered
+      # as the order, and all the <th> tag elements in the first column are
+      # considered as the index.
+      #
+      # == Arguments
+      #
+      # * path [String] - URL of the target HTML file.
+      # * fields [Hash] -
+      #
+      #   +:match+ - A *String* to match and choose a particular table(s) from multiple tables of a HTML page.
+      #
+      #   +:order+ - An *Array* which would act as the user-defined order, to override the parsed *Daru::DataFrame*.
+      #
+      #   +:index+ - An *Array* which would act as the user-defined index, to override the parsed *Daru::DataFrame*.
+      #
+      #   +:name+ - A *String* that manually assigns a name to the scraped *Daru::DataFrame*, for user's preference.
+      #
+      # == Returns
+      # An Array of +Daru::DataFrame+s, with each dataframe corresponding to a
+      # HTML table on that webpage.
+      #
+      # == Usage
+      #   dfs = Daru::DataFrame.from_html("http://www.moneycontrol.com/", match: "Sun Pharma")
+      #   dfs.count
+      #   # => 4
+      #
+      #   dfs.first
+      #   #
+      #   # => <Daru::DataFrame(5x4)>
+      #   #          Company      Price     Change Value (Rs
+      #   #     0 Sun Pharma     502.60     -65.05   2,117.87
+      #   #     1   Reliance    1356.90      19.60     745.10
+      #   #     2 Tech Mahin     379.45     -49.70     650.22
+      #   #     3        ITC     315.85       6.75     621.12
+      #   #     4       HDFC    1598.85      50.95     553.91
+      def from_html path, fields={}
+        Daru::IO.from_html path, fields
+      end
       # Create DataFrame by specifying rows as an Array of Arrays or Array of
       # Daru::Vector objects.
       def rows source, opts={}
@@ -229,6 +280,17 @@ module Daru
     # Default to *true*.
     #
     # == Usage
+    #
+    #   df = Daru::DataFrame.new
+    #   # =>
+    #   # <Daru::DataFrame(0x0)>
+    #   # Creates an empty DataFrame with no rows or columns.
+    #
+    #   df = Daru::DataFrame.new({}, order: [:a, :b])
+    #   #<Daru::DataFrame(0x2)>
+    #     a   b
+    #   # Creates a DataFrame with no rows and columns :a and :b
+    #
     #   df = Daru::DataFrame.new({a: [1,2,3,4], b: [6,7,8,9]}, order: [:b, :a],
     #     index: [:a, :b, :c, :d], name: :spider_man)
     #
@@ -239,26 +301,67 @@ module Daru
     #   #  b          7          2
     #   #  c          8          3
     #   #  d          9          4
-    def initialize source, opts={} # rubocop:disable Metrics/MethodLength
+    #
+    #   df = Daru::DataFrame.new([[1,2,3,4],[6,7,8,9]], name: :bat_man)
+    #
+    #   # =>
+    #   # #<Daru::DataFrame: bat_man (4x2)>
+    #   #             0          1
+    #   #  0          1          6
+    #   #  1          2          7
+    #   #  2          3          8
+    #   #  3          4          9
+    #
+    #   # Dataframe having Index name
+    #
+    #   df = Daru::DataFrame.new({a: [1,2,3,4], b: [6,7,8,9]}, order: [:b, :a],
+    #     index: Daru::Index.new([:a, :b, :c, :d], name: 'idx_name'),
+    #     name: :spider_man)
+    #
+    #   # =>
+    #   # <Daru::DataFrame:80766980 @name = spider_man @size = 4>
+    #   # idx_name            b          a
+    #   #        a          6          1
+    #   #        b          7          2
+    #   #        c          8          3
+    #   #        d          9          4
+    #
+    #
+    #   idx = Daru::Index.new [100, 99, 101, 1, 2], name: "s1"
+    #   => #<Daru::Index(5): s1 {100, 99, 101, 1, 2}>
+    #
+    #   df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
+    #     c: [11,22,33,44,55]},
+    #     order: [:a, :b, :c],
+    #     index: idx)
+    #    # =>
+    #    #<Daru::DataFrame(5x3)>
+    #    #   s1   a   b   c
+    #    #  100   1  11  11
+    #    #   99   2  12  22
+    #    #  101   3  13  33
+    #    #    1   4  14  44
+    #    #    2   5  15  55
+    def initialize source={}, opts={} # rubocop:disable Metrics/MethodLength
       vectors, index = opts[:order], opts[:index] # FIXME: just keyword arges after Ruby 2.1
       @data = []
       @name = opts[:name]
       case source
-      when ->(s) { s.empty? }
-        @vectors = Index.coerce vectors
-        @index   = Index.coerce index
-        create_empty_vectors
+      when [], {}
+        create_empty_vectors(vectors, index)
       when Array
         initialize_from_array source, vectors, index, opts
       when Hash
         initialize_from_hash source, vectors, index, opts
+      when ->(s) { s.empty? } # TODO: likely want to remove this case
+        create_empty_vectors(vectors, index)
       end
       set_size
       validate
       update
-      self.plotting_library = Daru.plotting_library
     end
     def plotting_library= lib
@@ -271,11 +374,18 @@ module Daru
           )
         end
       else
-        raise ArguementError, "Plotting library #{lib} not supported. "\
+        raise ArgumentError, "Plotting library #{lib} not supported. "\
           'Supported libraries are :nyaplot and :gruff'
       end
     end
+    # this method is overwritten: see Daru::DataFrame#plotting_library=
+    def plot(*args, **options, &b)
+      init_plotting_library
+      plot(*args, **options, &b)
+    end
     # Access row or vector. Specify name of row/vector followed by axis(:row, :vector).
     # Defaults to *:vector*. Use of this method is not recommended for accessing
     # rows. Use df.row[:a] for accessing row with index ':a'.
@@ -285,7 +395,7 @@ module Daru
     end
     # Retrive rows by positions
-    # @param [Array<Integer>] *positions positions of rows to retrive
+    # @param [Array<Integer>] positions of rows to retrive
     # @return [Daru::Vector, Daru::DataFrame] vector for single position and dataframe for multiple positions
     # @example
     #   df = Daru::DataFrame.new({
@@ -303,19 +413,17 @@ module Daru
       validate_positions(*positions, nrows)
       if positions.is_a? Integer
-        return Daru::Vector.new @data.map { |vec| vec.at(*positions) },
-          index: @vectors
+        row = get_rows_for([positions])
+        Daru::Vector.new row, index: @vectors
       else
-        new_rows = @data.map { |vec| vec.at(*original_positions) }
-        return Daru::DataFrame.new new_rows,
-          index: @index.at(*original_positions),
-          order: @vectors
+        new_rows = get_rows_for(original_positions)
+        Daru::DataFrame.new new_rows, index: @index.at(*original_positions), order: @vectors
       end
     end
     # Set rows by positions
     # @param [Array<Integer>] positions positions of rows to set
-    # @vector [Array, Daru::Vector] vector vector to be assigned
+    # @param [Array, Daru::Vector] vector vector to be assigned
     # @example
     #   df = Daru::DataFrame.new({
     #     a: [1, 2, 3],
@@ -348,7 +456,7 @@ module Daru
     end
     # Retrive vectors by positions
-    # @param [Array<Integer>] *positions positions of vectors to retrive
+    # @param [Array<Integer>] positions of vectors to retrive
     # @return [Daru::Vector, Daru::DataFrame] vector for single position and dataframe for multiple positions
     # @example
     #   df = Daru::DataFrame.new({
@@ -432,13 +540,24 @@ module Daru
     end
     def add_row row, index=nil
-      self.row[index || @size] = row
+      self.row[*(index || @size)] = row
     end
     def add_vector n, vector
       self[n] = vector
     end
+    def insert_vector n, name, source
+      raise ArgumentError unless source.is_a? Array
+      vector = Daru::Vector.new(source, index: @index, name: @name)
+      @data << vector
+      @vectors = @vectors.add name
+      ordr = @vectors.dup.to_a
+      elmnt = ordr.pop
+      ordr = ordr.insert n, elmnt
+      self.order=ordr
+    end
     # Access a row or set/create a row. Refer #[] and #[]= docs for details.
     #
     # == Usage
@@ -448,6 +567,20 @@ module Daru
       Daru::Accessors::DataFrameByRow.new(self)
     end
+    # Extract a dataframe given row indexes or positions
+    # @param keys [Array] can be positions (if by_position is true) or indexes (if by_position if false)
+    # @return [Daru::Dataframe]
+    def get_sub_dataframe(keys, by_position: true)
+      return Daru::DataFrame.new({}) if keys == []
+      keys = @index.pos(*keys) unless by_position
+      sub_df = row_at(*keys)
+      sub_df = sub_df.to_df.transpose if sub_df.is_a?(Daru::Vector)
+      sub_df
+    end
     # Duplicate the DataFrame entirely.
     #
     # == Arguments
@@ -457,7 +590,7 @@ module Daru
     def dup vectors_to_dup=nil
       vectors_to_dup = @vectors.to_a unless vectors_to_dup
-      src = vectors_to_dup.map { |vec| @data[@vectors[vec]].dup }
+      src = vectors_to_dup.map { |vec| @data[@vectors.pos(vec)].dup }
       new_order = Daru::Index.new(vectors_to_dup)
       Daru::DataFrame.new src, order: new_order, index: @index.dup, name: @name, clone: true
@@ -506,8 +639,8 @@ module Daru
     deprecate :dup_only_valid, :reject_values, 2016, 10
     # Returns a dataframe in which rows with any of the mentioned values
-    #   are ignored.
-    # @param [Array] *values values to reject to form the new dataframe
+    # are ignored.
+    # @param [Array] values to reject to form the new dataframe
     # @return [Daru::DataFrame] Data Frame with only rows which doesn't
     #   contain the mentioned values
     # @example
@@ -544,7 +677,7 @@ module Daru
     #     b: [:a,  :b,          nil, Float::NAN, nil,        3,   5,   8],
     #     c: ['a',  Float::NAN, 3,   4,          3,          5,   nil, 7]
     #   }, index: 11..18)
-    #   df
+    #   df.replace_values nil, Float::NAN
     #   # => #<Daru::DataFrame(8x3)>
     #   #       a   b   c
     #   #   11   1   a   a
@@ -560,6 +693,89 @@ module Daru
       self
     end
+    # Rolling fillna
+    # replace all Float::NAN and NIL values with the preceeding or following value
+    #
+    # @param direction [Symbol] (:forward, :backward) whether replacement value is preceeding or following
+    #
+    # @example
+    #   df = Daru::DataFrame.new({
+    #    a: [1,    2,          3,   nil,        Float::NAN, nil, 1,   7],
+    #    b: [:a,  :b,          nil, Float::NAN, nil,        3,   5,   nil],
+    #    c: ['a',  Float::NAN, 3,   4,          3,          5,   nil, 7]
+    #   })
+    #
+    #   => #<Daru::DataFrame(8x3)>
+    #        a   b   c
+    #    0   1   a   a
+    #    1   2   b NaN
+    #    2   3 nil   3
+    #    3 nil NaN   4
+    #    4 NaN nil   3
+    #    5 nil   3   5
+    #    6   1   5 nil
+    #    7   7 nil   7
+    #
+    #   2.3.3 :068 > df.rolling_fillna(:forward)
+    #   => #<Daru::DataFrame(8x3)>
+    #        a   b   c
+    #    0   1   a   a
+    #    1   2   b   a
+    #    2   3   b   3
+    #    3   3   b   4
+    #    4   3   b   3
+    #    5   3   3   5
+    #    6   1   5   5
+    #    7   7   5   7
+    #
+    def rolling_fillna!(direction=:forward)
+      @data.each { |vec| vec.rolling_fillna!(direction) }
+      self
+    end
+    def rolling_fillna(direction=:forward)
+      dup.rolling_fillna!(direction)
+    end
+    # Return unique rows by vector specified or all vectors
+    #
+    # @param vtrs [String][Symbol] vector names(s) that should be considered
+    #
+    # @example
+    #
+    #    => #<Daru::DataFrame(6x2)>
+    #         a   b
+    #     0   1   a
+    #     1   2   b
+    #     2   3   c
+    #     3   4   d
+    #     2   3   c
+    #     3   4   f
+    #
+    #    2.3.3 :> df.unique
+    #    => #<Daru::DataFrame(5x2)>
+    #         a   b
+    #     0   1   a
+    #     1   2   b
+    #     2   3   c
+    #     3   4   d
+    #     3   4   f
+    #
+    #    2.3.3 :> df.unique(:a)
+    #    => #<Daru::DataFrame(5x2)>
+    #         a   b
+    #     0   1   a
+    #     1   2   b
+    #     2   3   c
+    #     3   4   d
+    #
+    def uniq(*vtrs)
+      vecs = vtrs.empty? ? vectors.to_a : Array(vtrs)
+      grouped = group_by(vecs)
+      indexes = grouped.groups.values.map { |v| v[0] }.sort
+      row[*indexes]
+    end
     # Iterate over each index of the DataFrame.
     def each_index &block
       return to_enum(:each_index) unless block_given?
@@ -679,7 +895,7 @@ module Daru
     # * +axis+ - The axis to map over. Can be :vector (or :column) or :row.
     # Default to :vector.
     def map! axis=:vector, &block
-      if axis == :vector || axis == :column
+      if %i[vector column].include?(axis)
         map_vectors!(&block)
       elsif axis == :row
         map_rows!(&block)
@@ -807,6 +1023,18 @@ module Daru
       self
     end
+    def apply_method(method, keys: nil, by_position: true)
+      df = keys ? get_sub_dataframe(keys, by_position: by_position) : self
+      case method
+      when Symbol then df.send(method)
+      when Proc   then method.call(df)
+      when Array  then method.map(&:to_proc).map { |proc| proc.call(df) } # works with Array of both Symbol and/or Proc
+      else raise
+      end
+    end
+    alias :apply_method_on_sub_df :apply_method
     # Retrieves a Daru::Vector, based on the result of calculation
     # performed on each row.
     def collect_rows &block
@@ -913,7 +1141,7 @@ module Daru
     # creates a new vector with the data of a given field which the block returns true
     def filter_vector vec, &block
-      Daru::Vector.new each_row.select(&block).map { |row| row[vec] }
+      Daru::Vector.new(each_row.select(&block).map { |row| row[vec] })
     end
     # Iterates over each row and retains it in a new DataFrame if the block returns
@@ -934,9 +1162,9 @@ module Daru
       dup.tap { |df| df.keep_vector_if(&block) }
     end
-    # Test each row with one or more tests. Each test is a Proc with the form
-    # *Proc.new {|row| row[:age] > 0}*
-    #
+    # Test each row with one or more tests.
+    # @param tests [Proc]  Each test is a Proc with the form
+    #                      *Proc.new {|row| row[:age] > 0}*
     # The function returns an array with all errors.
     #
     # FIXME: description here is too sparse. As far as I can get,
@@ -1031,14 +1259,14 @@ module Daru
     alias :vector_missing_values :missing_values_rows
     def has_missing_data?
-      !!@data.any? { |vec| vec.include_values?(*Daru::MISSING_VALUES) }
+      @data.any? { |vec| vec.include_values?(*Daru::MISSING_VALUES) }
     end
     alias :flawed? :has_missing_data?
     deprecate :has_missing_data?, :include_values?, 2016, 10
     deprecate :flawed?, :include_values?, 2016, 10
     # Check if any of given values occur in the data frame
-    # @param [Array] *values values to check for
+    # @param [Array] values to check for
     # @return [true, false] true if any of the given values occur in the
     #   dataframe, false otherwise
     # @example
@@ -1119,7 +1347,7 @@ module Daru
     #     row[:a] < 3 and row[:b] == 'b'
     #   end #=> true
     def any? axis=:vector, &block
-      if axis == :vector || axis == :column
+      if %i[vector column].include?(axis)
         @data.any?(&block)
       elsif axis == :row
         each_row do |row|
@@ -1141,7 +1369,7 @@ module Daru
     #     row[:a] < 10
     #   end #=> true
     def all? axis=:vector, &block
-      if axis == :vector || axis == :column
+      if %i[vector column].include?(axis)
         @data.all?(&block)
       elsif axis == :row
         each_row.all?(&block)
@@ -1169,13 +1397,60 @@ module Daru
     alias :last :tail
-    # Returns a vector with sum of all vectors specified in the argument.
-    # If vecs parameter is empty, sum all numeric vector.
-    def vector_sum vecs=nil
+    # Sum all numeric/specified vectors in the DataFrame.
+    #
+    # Returns a new vector that's a containing a sum of all numeric
+    # or specified vectors of the DataFrame. By default, if the vector
+    # contains a nil, the sum is nil.
+    # With :skipnil argument set to true, nil values are assumed to be
+    # 0 (zero) and the sum vector is returned.
+    #
+    # @param args [Array] List of vectors to sum. Default is nil in which case
+    #   all numeric vectors are summed.
+    #
+    # @option opts [Boolean] :skipnil Consider nils as 0. Default is false.
+    #
+    # @return Vector with sum of all vectors specified in the argument.
+    #   If vecs parameter is empty, sum all numeric vector.
+    #
+    # @example
+    #    df = Daru::DataFrame.new({
+    #       a: [1, 2, nil],
+    #       b: [2, 1, 3],
+    #       c: [1, 1, 1]
+    #     })
+    #    => #<Daru::DataFrame(3x3)>
+    #           a   b   c
+    #       0   1   2   1
+    #       1   2   1   1
+    #       2 nil   3   1
+    #    df.vector_sum [:a, :c]
+    #    => #<Daru::Vector(3)>
+    #       0   2
+    #       1   3
+    #       2 nil
+    #    df.vector_sum
+    #    => #<Daru::Vector(3)>
+    #       0   4
+    #       1   4
+    #       2 nil
+    #    df.vector_sum skipnil: true
+    #    => #<Daru::Vector(3)>
+    #           c
+    #       0   4
+    #       1   4
+    #       2   4
+    #
+    def vector_sum(*args)
+      defaults = {vecs: nil, skipnil: false}
+      options = args.last.is_a?(::Hash) ? args.pop : {}
+      options = defaults.merge(options)
+      vecs = args[0] || options[:vecs]
+      skipnil = args[1] || options[:skipnil]
       vecs ||= numeric_vectors
       sum = Daru::Vector.new [0]*@size, index: @index, name: @name, dtype: @dtype
-      vecs.inject(sum) { |memo, n| memo + self[n] }
+      vecs.inject(sum) { |memo, n| self[n].add(memo, skipnil: skipnil) }
     end
     # Calculate mean of the rows of the dataframe.
@@ -1220,11 +1495,10 @@ module Daru
     #   # ["foo", "two", 3]=>[2, 4]}
     def group_by *vectors
       vectors.flatten!
-      # FIXME: wouldn't it better to do vectors - @vectors here and
-      # raise one error with all non-existent vector names?.. - zverok, 2016-05-18
-      vectors.each { |v|
-        raise(ArgumentError, "Vector #{v} does not exist") unless has_vector?(v)
-      }
+      missing = vectors - @vectors.to_a
+      unless missing.empty?
+        raise(ArgumentError, "Vector(s) missing: #{missing.join(', ')}")
+      end
       vectors = [@vectors.first] if vectors.empty?
@@ -1234,7 +1508,7 @@ module Daru
     def reindex_vectors new_vectors
       unless new_vectors.is_a?(Daru::Index)
         raise ArgumentError, 'Must pass the new index of type Index or its '\
-          "subclasses, not #{new_index.class}"
+          "subclasses, not #{new_vectors.class}"
       end
       cl = Daru::DataFrame.new({}, order: new_vectors, index: @index, name: @name)
@@ -1272,14 +1546,52 @@ module Daru
       df
     end
+    module SetSingleIndexStrategy
+      def self.uniq_size(df, col)
+        df[col].uniq.size
+      end
+      def self.new_index(df, col)
+        Daru::Index.new(df[col].to_a)
+      end
+      def self.delete_vector(df, col)
+        df.delete_vector(col)
+      end
+    end
+    module SetMultiIndexStrategy
+      def self.uniq_size(df, cols)
+        df[*cols].uniq.size
+      end
+      def self.new_index(df, cols)
+        Daru::MultiIndex.from_arrays(df[*cols].map_vectors(&:to_a)).tap do |mi|
+          mi.name = cols
+          mi
+        end
+      end
+      def self.delete_vector(df, cols)
+        df.delete_vectors(*cols)
+      end
+    end
     # Set a particular column as the new DF
-    def set_index new_index, opts={}
-      raise ArgumentError, 'All elements in new index must be unique.' if
-        @size != self[new_index].uniq.size
+    def set_index new_index_col, opts={}
+      if new_index_col.respond_to?(:to_a)
+        strategy = SetMultiIndexStrategy
+        new_index_col = new_index_col.to_a
+      else
+        strategy = SetSingleIndexStrategy
+      end
-      self.index = Daru::Index.new(self[new_index].to_a)
-      delete_vector(new_index) unless opts[:keep]
+      uniq_size = strategy.uniq_size(self, new_index_col)
+      raise ArgumentError, 'All elements in new index must be unique.' if
+        @size != uniq_size
+      self.index = strategy.new_index(self, new_index_col)
+      strategy.delete_vector(self, new_index_col) unless opts[:keep]
       self
     end
@@ -1317,11 +1629,24 @@ module Daru
       end
     end
+    def reset_index
+      index_df = index.to_df
+      names = index.name
+      names = [names] unless names.instance_of?(Array)
+      new_vectors = names + vectors.to_a
+      self.index = index_df.index
+      names.each do |name|
+        self[name] = index_df[name]
+      end
+      self.order = new_vectors
+      self
+    end
     # Reassign index with a new index of type Daru::Index or any of its subclasses.
     #
     # @param [Daru::Index] idx New index object on which the rows of the dataframe
     #   are to be indexed.
-    # @example Reassgining index of a DataFrame
+    # @example Reassigining index of a DataFrame
     #   df = Daru::DataFrame.new({a: [1,2,3,4], b: [11,22,33,44]})
     #   df.index.to_a #=> [0,1,2,3]
     #
@@ -1337,7 +1662,7 @@ module Daru
     # Reassign vectors with a new index of type Daru::Index or any of its subclasses.
     #
-    # @param [Daru::Index] idx The new index object on which the vectors are to
+    # @param new_index [Daru::Index] idx The new index object on which the vectors are to
     #   be indexed. Must of the same size as ncols.
     # @example Reassigning vectors of a DataFrame
     #   df = Daru::DataFrame.new({a: [1,2,3,4], b: [:a,:b,:c,:d], c: [11,22,33,44]})
@@ -1377,13 +1702,31 @@ module Daru
     #   df.rename_vectors :a => :alpha, :c => :gamma
     #   df.vectors.to_a #=> [:alpha, :b, :gamma]
     def rename_vectors name_map
-      existing_targets = name_map.select { |k,v| k != v }.values & vectors.to_a
+      existing_targets = name_map.reject { |k,v| k == v }.values & vectors.to_a
       delete_vectors(*existing_targets)
       new_names = vectors.to_a.map { |v| name_map[v] ? name_map[v] : v }
       self.vectors = Daru::Index.new new_names
     end
+    # Renames the vectors and returns itself
+    #
+    # == Arguments
+    #
+    # * name_map - A hash where the keys are the exising vector names and
+    #              the values are the new names.  If a vector is renamed
+    #              to a vector name that is already in use, the existing
+    #              one is overwritten.
+    #
+    # == Usage
+    #
+    #   df = Daru::DataFrame.new({ a: [1,2,3,4], b: [:a,:b,:c,:d], c: [11,22,33,44] })
+    #   df.rename_vectors! :a => :alpha, :c => :gamma # df
+    def rename_vectors! name_map
+      rename_vectors(name_map)
+      self
+    end
     # Return the indexes of all the numeric vectors. Will include vectors with nils
     # alongwith numbers.
     def numeric_vectors
@@ -1408,27 +1751,24 @@ module Daru
       Daru::DataFrame.new(arry, clone: cln, order: order, index: @index)
     end
-    # Generate a summary of this DataFrame with ReportBuilder.
-    def summary(method=:to_text)
-      ReportBuilder.new(no_title: true).add(self).send(method)
-    end
-    def report_building(b) # :nodoc: #
-      b.section(name: @name) do |g|
-        g.text "Number of rows: #{nrows}"
-        @vectors.each do |v|
-          g.text "Element:[#{v}]"
-          g.parse_element(self[v])
-        end
+    # Generate a summary of this DataFrame based on individual vectors in the DataFrame
+    # @return [String] String containing the summary of the DataFrame
+    def summary
+      summary = "= #{name}"
+      summary << "\n  Number of rows: #{nrows}"
+      @vectors.each do |v|
+        summary << "\n  Element:[#{v}]\n"
+        summary << self[v].summary(1)
       end
+      summary
     end
     # Sorts a dataframe (ascending/descending) in the given pripority sequence of
     # vectors, with or without a block.
     #
-    # @param order [Array] The order of vector names in which the DataFrame
+    # @param vector_order [Array] The order of vector names in which the DataFrame
     #   should be sorted.
-    # @param [Hash] opts The options to sort with.
+    # @param opts [Hash] opts The options to sort with.
     # @option opts [TrueClass,FalseClass,Array] :ascending (true) Sort in ascending
     #   or descending order. Specify Array corresponding to *order* for multiple
     #   sort orders.
@@ -1597,12 +1937,11 @@ module Daru
       new_fields = (@vectors.to_a + other_df.vectors.to_a)
       new_fields = ArrayHelper.recode_repeated(new_fields)
       DataFrame.new({}, order: new_fields).tap do |df_new|
         (0...nrows).each do |i|
           df_new.add_row row[i].to_a + other_df.row[i].to_a
         end
+        df_new.index = @index if @index == other_df.index
         df_new.update
       end
     end
@@ -1783,7 +2122,9 @@ module Daru
     end
     # Convert to html for IRuby.
-    def to_html threshold=30
+    def to_html(threshold=Daru.max_rows)
+      table_thead = to_html_thead
+      table_tbody = to_html_tbody(threshold)
       path = if index.is_a?(MultiIndex)
                File.expand_path('../iruby/templates/dataframe_mi.html.erb', __FILE__)
              else
@@ -1792,8 +2133,29 @@ module Daru
       ERB.new(File.read(path).strip).result(binding)
     end
+    def to_html_thead
+      table_thead_path =
+        if index.is_a?(MultiIndex)
+          File.expand_path('../iruby/templates/dataframe_mi_thead.html.erb', __FILE__)
+        else
+          File.expand_path('../iruby/templates/dataframe_thead.html.erb', __FILE__)
+        end
+      ERB.new(File.read(table_thead_path).strip).result(binding)
+    end
+    def to_html_tbody(threshold=Daru.max_rows)
+      threshold ||= @size
+      table_tbody_path =
+        if index.is_a?(MultiIndex)
+          File.expand_path('../iruby/templates/dataframe_mi_tbody.html.erb', __FILE__)
+        else
+          File.expand_path('../iruby/templates/dataframe_tbody.html.erb', __FILE__)
+        end
+      ERB.new(File.read(table_tbody_path).strip).result(binding)
+    end
     def to_s
-      to_html
+      "#<#{self.class}#{': ' + @name.to_s if @name}(#{nrows}x#{ncols})>"
     end
     # Method for updating the metadata (i.e. missing value positions) of the
@@ -1815,7 +2177,7 @@ module Daru
     # Write this DataFrame to a CSV file.
     #
-    # == Arguements
+    # == Arguments
     #
     # * filename - Path of CSV file where the DataFrame is to be saved.
     #
@@ -1899,15 +2261,15 @@ module Daru
     end
     # Pretty print in a nice table format for the command line (irb/pry/iruby)
-    def inspect spacing=10, threshold=15
-      row_headers = index.is_a?(MultiIndex) ? index.sparse_tuples : index.to_a
+    def inspect spacing=Daru.spacing, threshold=Daru.max_rows
       name_part = @name ? ": #{@name} " : ''
+      spacing = [headers.to_a.map(&:length).max, spacing].max
-      "#<#{self.class}#{name_part}(#{nrows}x#{ncols})>\n" +
+      "#<#{self.class}#{name_part}(#{nrows}x#{ncols})>#{$INPUT_RECORD_SEPARATOR}" +
         Formatters::Table.format(
           each_row.lazy,
           row_headers: row_headers,
-          headers: vectors,
+          headers: headers,
           threshold: threshold,
           spacing: spacing
         )
@@ -1927,7 +2289,7 @@ module Daru
     end
     # Converts the specified non category type vectors to category type vectors
-    # @param [Array] *names names of non category type vectors to be converted
+    # @param [Array] names of non category type vectors to be converted
     # @return [Daru::DataFrame] data frame in which specified vectors have been
     #   converted to category type
     # @example
@@ -1992,7 +2354,7 @@ module Daru
     #   #   2   3]
     def split_by_category cat_name
       cat_dv = self[cat_name]
-      raise ArguementError, "#{cat_name} is not a category vector" unless
+      raise ArgumentError, "#{cat_name} is not a category vector" unless
         cat_dv.category?
       cat_dv.categories.map do |cat|
@@ -2002,8 +2364,128 @@ module Daru
       end
     end
+    # @param indexes [Array] index(s) at which row tuples are retrieved
+    # @return [Array] returns array of row tuples at given index(s)
+    # @example Using Daru::Index
+    #   df = Daru::DataFrame.new({
+    #     a: [1, 2, 3],
+    #     b: ['a', 'a', 'b']
+    #   })
+    #
+    #   df.access_row_tuples_by_indexs(1,2)
+    #   # => [[2, "a"], [3, "b"]]
+    #
+    #   df.index = Daru::Index.new([:one,:two,:three])
+    #   df.access_row_tuples_by_indexs(:one,:three)
+    #   # => [[1, "a"], [3, "b"]]
+    #
+    # @example Using Daru::MultiIndex
+    #   mi_idx = Daru::MultiIndex.from_tuples [
+    #     [:a,:one,:bar],
+    #     [:a,:one,:baz],
+    #     [:b,:two,:bar],
+    #     [:a,:two,:baz],
+    #   ]
+    #   df_mi = Daru::DataFrame.new({
+    #     a: 1..4,
+    #     b: 'a'..'d'
+    #   }, index: mi_idx )
+    #
+    #   df_mi.access_row_tuples_by_indexs(:b, :two, :bar)
+    #   # => [[3, "c"]]
+    #   df_mi.access_row_tuples_by_indexs(:a)
+    #   # => [[1, "a"], [2, "b"], [4, "d"]]
+    def access_row_tuples_by_indexs *indexes
+      return get_sub_dataframe(indexes, by_position: false).map_rows(&:to_a) if
+      @index.is_a?(Daru::MultiIndex)
+      positions = @index.pos(*indexes)
+      if positions.is_a? Numeric
+        row = get_rows_for([positions])
+        row.first.is_a?(Array) ? row : [row]
+      else
+        new_rows = get_rows_for(indexes, by_position: false)
+        indexes.map { |index| new_rows.map { |r| r[index] } }
+      end
+    end
+    # Function to use for aggregating the data.
+    #
+    # @param options [Hash] options for column, you want in resultant dataframe
+    #
+    # @return [Daru::DataFrame]
+    #
+    # @example
+    #   df = Daru::DataFrame.new(
+    #      {col: [:a, :b, :c, :d, :e], num: [52,12,07,17,01]})
+    #   => #<Daru::DataFrame(5x2)>
+    #        col num
+    #      0   a  52
+    #      1   b  12
+    #      2   c   7
+    #      3   d  17
+    #      4   e   1
+    #
+    #    df.aggregate(num_100_times: ->(df) { (df.num*100).first })
+    #   => #<Daru::DataFrame(5x1)>
+    #               num_100_ti
+    #             0       5200
+    #             1       1200
+    #             2        700
+    #             3       1700
+    #             4        100
+    #
+    #   When we have duplicate index :
+    #
+    #   idx = Daru::CategoricalIndex.new [:a, :b, :a, :a, :c]
+    #   df = Daru::DataFrame.new({num: [52,12,07,17,01]}, index: idx)
+    #   => #<Daru::DataFrame(5x1)>
+    #        num
+    #      a  52
+    #      b  12
+    #      a   7
+    #      a  17
+    #      c   1
+    #
+    #   df.aggregate(num: :mean)
+    #   => #<Daru::DataFrame(3x1)>
+    #                      num
+    #             a 25.3333333
+    #             b         12
+    #             c          1
+    #
+    # Note: `GroupBy` class `aggregate` method uses this `aggregate` method
+    # internally.
+    def aggregate(options={}, multi_index_level=-1)
+      if block_given?
+        positions_tuples, new_index = yield(@index) # note: use of yield is private for now
+      else
+        positions_tuples, new_index = group_index_for_aggregation(@index, multi_index_level)
+      end
+      colmn_value = aggregate_by_positions_tuples(options, positions_tuples)
+      Daru::DataFrame.new(colmn_value, index: new_index, order: options.keys)
+    end
+    def group_by_and_aggregate(*group_by_keys, **aggregation_map)
+      group_by(*group_by_keys).aggregate(aggregation_map)
+    end
     private
+    # Will lazily load the plotting library being used for this dataframe
+    def init_plotting_library
+      self.plotting_library = Daru.plotting_library
+    end
+    def headers
+      Daru::Index.new(Array(index.name) + @vectors.to_a)
+    end
+    def row_headers
+      index.is_a?(MultiIndex) ? index.sparse_tuples : index.to_a
+    end
     def convert_categorical_vectors names
       names.map do |n|
         next unless self[n].category?
@@ -2034,7 +2516,7 @@ module Daru
     end
     def dispatch_to_axis(axis, method, *args, &block)
-      if axis == :vector || axis == :column
+      if %i[vector column].include?(axis)
         send("#{method}_vector", *args, &block)
       elsif axis == :row
         send("#{method}_row", *args, &block)
@@ -2044,7 +2526,7 @@ module Daru
     end
     def dispatch_to_axis_pl(axis, method, *args, &block)
-      if axis == :vector || axis == :column
+      if %i[vector column].include?(axis)
         send("#{method}_vectors", *args, &block)
       elsif axis == :row
         send("#{method}_rows", *args, &block)
@@ -2053,7 +2535,7 @@ module Daru
       end
     end
-    AXES = [:row, :vector].freeze
+    AXES = %i[row vector].freeze
     def extract_axis names, default=:vector
       if AXES.include?(names.last)
@@ -2065,7 +2547,7 @@ module Daru
     def access_vector *names
       if names.first.is_a?(Range)
-        dup(@vectors[names.first])
+        dup(@vectors.subset(names.first))
       elsif @vectors.is_a?(MultiIndex)
         access_vector_multi_index(*names)
       else
@@ -2087,14 +2569,16 @@ module Daru
     def access_vector_single_index *names
       if names.count < 2
-        pos = @vectors[names.first]
+        begin
+          pos = @vectors.is_a?(Daru::DateTimeIndex) ? @vectors[names.first] : @vectors.pos(names.first)
+        rescue IndexError
+          raise IndexError, "Specified vector #{names.first} does not exist"
+        end
         return @data[pos] if pos.is_a?(Numeric)
         names = pos
       end
-      new_vectors = names.map { |name| [name, @data[@vectors[name]]] }.to_h
+      new_vectors = names.map { |name| [name, @data[@vectors.pos(name)]] }.to_h
       order = names.is_a?(Array) ? Daru::Index.new(names) : names
       Daru::DataFrame.new(new_vectors, order: order,
@@ -2105,19 +2589,30 @@ module Daru
       positions = @index.pos(*indexes)
       if positions.is_a? Numeric
-        return Daru::Vector.new populate_row_for(positions),
-          index: @vectors,
-          name: indexes.first
+        row = get_rows_for([positions])
+        Daru::Vector.new row, index: @vectors, name: indexes.first
       else
-        new_rows = @data.map { |vec| vec[*indexes] }
-        return Daru::DataFrame.new new_rows,
-          index: @index.subset(*indexes),
-          order: @vectors
+        new_rows = get_rows_for(indexes, by_position: false)
+        Daru::DataFrame.new new_rows, index: @index.subset(*indexes), order: @vectors
       end
     end
-    def populate_row_for pos
-      @data.map { |vector| vector.at(*pos) }
+    # @param keys [Array] can be an array of positions (if by_position is true) or indexes (if by_position if false)
+    # because of coercion by Daru::Vector#at and Daru::Vector#[], can return either an Array of
+    #   values (representing a row) or an array of Vectors (that can be seen as rows)
+    def get_rows_for(keys, by_position: true)
+      raise unless keys.is_a?(Array)
+      if by_position
+        pos = keys
+        @data.map { |vector| vector.at(*pos) }
+      else
+        # TODO: for now (2018-07-27), it is different than using
+        #    get_rows_for(@index.pos(*keys))
+        #    because Daru::Vector#at and Daru::Vector#[] don't handle Daru::MultiIndex the same way
+        indexes = keys
+        @data.map { |vec| vec[*indexes] }
+      end
     end
     def insert_or_modify_vector name, vector
@@ -2126,7 +2621,7 @@ module Daru
       if @index.empty?
         insert_vector_in_empty name, vector
       else
-        vec = prepare_vector_for_insert name, vector
+        vec = prepare_for_insert name, vector
         assign_or_add_vector name, vec
       end
@@ -2173,25 +2668,35 @@ module Daru
       @data.map! { |v| v.empty? ? v.reindex(@index) : v }
     end
-    def prepare_vector_for_insert name, vector
-      if vector.is_a?(Daru::Vector)
-        # so that index-by-index assignment is avoided when possible.
-        return vector.dup if vector.index == @index
-        Daru::Vector.new([], name: coerce_name(name), index: @index).tap { |v|
-          @index.each do |idx|
-            v[idx] = vector.index.include?(idx) ? vector[idx] : nil
-          end
-        }
+    def prepare_for_insert name, arg
+      if arg.is_a? Daru::Vector
+        prepare_vector_for_insert name, arg
+      elsif arg.respond_to?(:to_a)
+        prepare_enum_for_insert name, arg
       else
-        # FIXME: No spec checks this case... And SizeError is not a thing - zverok, 2016-05-08
-        if @size != vector.size
-          raise SizeError,
-            "Specified vector of length #{vector.size} cannot be inserted in DataFrame of size #{@size}"
+        prepare_value_for_insert name, arg
+      end
+    end
+    def prepare_vector_for_insert name, vector
+      # so that index-by-index assignment is avoided when possible.
+      return vector.dup if vector.index == @index
+      Daru::Vector.new([], name: coerce_name(name), index: @index).tap { |v|
+        @index.each do |idx|
+          v[idx] = vector.index.include?(idx) ? vector[idx] : nil
         end
+      }
+    end
-        Daru::Vector.new(vector, name: coerce_name(name), index: @index)
+    def prepare_enum_for_insert name, enum
+      if @size != enum.size
+        raise "Specified vector of length #{enum.size} cannot be inserted in DataFrame of size #{@size}"
       end
+      Daru::Vector.new(enum, name: coerce_name(name), index: @index)
+    end
+    def prepare_value_for_insert name, value
+      Daru::Vector.new(Array(value) * @size, name: coerce_name(name), index: @index)
     end
     def insert_or_modify_row indexes, vector
@@ -2208,7 +2713,10 @@ module Daru
       set_size
     end
-    def create_empty_vectors
+    def create_empty_vectors(vectors, index)
+      @vectors = Index.coerce vectors
+      @index   = Index.coerce index
       @data = @vectors.map do |name|
         Daru::Vector.new([], name: coerce_name(name), index: @index)
       end
@@ -2250,7 +2758,7 @@ module Daru
     end
     def create_vectors_index_with vectors, source
-      vectors = source.keys.sort_by(&:to_s) if vectors.nil?
+      vectors = source.keys if vectors.nil?
       @vectors =
         if vectors.is_a?(Index) || vectors.is_a?(MultiIndex)
@@ -2276,8 +2784,10 @@ module Daru
       case source.first
       when Array
+        vectors ||= (0..source.size-1).to_a
         initialize_from_array_of_arrays source, vectors, index, opts
       when Vector
+        vectors ||= (0..source.size-1).to_a
         initialize_from_array_of_vectors source, vectors, index, opts
       when Hash
         initialize_from_array_of_hashes source, vectors, index, opts
@@ -2295,9 +2805,7 @@ module Daru
       @index   = Index.coerce(index || source[0].size)
       @vectors = Index.coerce(vectors)
-      @data = @vectors.each_with_index.map do |_vec,idx|
-        Daru::Vector.new(source[idx], index: @index, name: vectors[idx])
-      end
+      update_data source, vectors
     end
     def initialize_from_array_of_vectors source, vectors, index, opts
@@ -2528,7 +3036,6 @@ module Daru
     # Raises IndexError when one of the positions is not a valid position
     def validate_positions *positions, size
-      positions = [positions] if positions.is_a? Integer
       positions.each do |pos|
         raise IndexError, "#{pos} is not a valid position." if pos >= size
       end
@@ -2546,6 +3053,73 @@ module Daru
       end
     end
+    def update_data source, vectors
+      @data = @vectors.each_with_index.map do |_vec, idx|
+        Daru::Vector.new(source[idx], index: @index, name: vectors[idx])
+      end
+    end
+    def aggregate_by_positions_tuples(options, positions_tuples)
+      agg_over_vectors_only, options = cast_aggregation_options(options)
+      if agg_over_vectors_only
+        options.map do |vect_name, method|
+          vect = self[vect_name]
+          positions_tuples.map do |positions|
+            vect.apply_method_on_sub_vector(method, keys: positions)
+          end
+        end
+      else
+        methods = options.values
+        # note: because we aggregate over rows, we don't have to re-get sub-dfs for each method (which is expensive)
+        rows = positions_tuples.map do |positions|
+          apply_method_on_sub_df(methods, keys: positions)
+        end
+        rows.transpose
+      end
+    end
+    # convert operations over sub-vectors to operations over sub-dfs when it improves perf
+    # note: we don't always "cast" because aggregation over a single vector / a few vector is faster
+    #   than aggregation over (sub-)dfs
+    def cast_aggregation_options(options)
+      vects, non_vects = options.keys.partition { |k| @vectors.include?(k) }
+      over_vectors = true
+      if non_vects.any?
+        options = options.clone
+        vects.each do |name|
+          proc_on_vect = options[name].to_proc
+          options[name] = ->(sub_df) { proc_on_vect.call(sub_df[name]) }
+        end
+        over_vectors = false
+      end
+      [over_vectors, options]
+    end
+    def group_index_for_aggregation(index, multi_index_level=-1)
+      case index
+      when Daru::MultiIndex
+        groups_by_pos = Daru::Core::GroupBy.get_positions_group_for_aggregation(index, multi_index_level)
+        new_index = Daru::MultiIndex.from_tuples(groups_by_pos.keys).coerce_index
+        pos_tuples = groups_by_pos.values
+      when Daru::Index, Daru::CategoricalIndex
+        new_index = Array(index).uniq
+        pos_tuples = new_index.map { |idx| [*index.pos(idx)] }
+      else raise
+      end
+      [pos_tuples, new_index]
+    end
     # coerce ranges, integers and array in appropriate ways
     def coerce_positions *positions, size
       if positions.size == 1
@@ -2555,7 +3129,7 @@ module Daru
         when Range
           size.times.to_a[positions.first]
         else
-          raise ArgumentError, 'Unkown position type.'
+          raise ArgumentError, 'Unknown position type.'
         end
       else
         positions