RubyGems - daru - Versions diffs - 0.1.6 → 0.2.0 - Mend

daru 0.1.6 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

checksums.yaml +4 -4
data/.github/ISSUE_TEMPLATE.md +18 -0
data/.rubocop.yml +1 -0
data/.travis.yml +5 -0
data/History.md +28 -0
data/README.md +6 -0
data/ReleasePolicy.md +20 -0
data/daru.gemspec +4 -0
data/lib/daru.rb +1 -2
data/lib/daru/category.rb +15 -10
data/lib/daru/core/group_by.rb +51 -8
data/lib/daru/dataframe.rb +267 -28
data/lib/daru/date_time/index.rb +1 -1
data/lib/daru/date_time/offsets.rb +1 -1
data/lib/daru/extensions/which_dsl.rb +55 -0
data/lib/daru/index/categorical_index.rb +4 -4
data/lib/daru/index/index.rb +5 -5
data/lib/daru/index/multi_index.rb +11 -2
data/lib/daru/io/io.rb +1 -1
data/lib/daru/maths/arithmetic/vector.rb +38 -2
data/lib/daru/maths/statistics/dataframe.rb +19 -19
data/lib/daru/maths/statistics/vector.rb +225 -78
data/lib/daru/plotting/nyaplot/dataframe.rb +11 -0
data/lib/daru/vector.rb +55 -13
data/lib/daru/version.rb +1 -1
data/profile/vector_new.rb +9 -0
data/spec/category_spec.rb +5 -1
data/spec/core/group_by_spec.rb +128 -0
data/spec/dataframe_spec.rb +125 -10
data/spec/extensions/which_dsl_spec.rb +38 -0
data/spec/fixtures/duplicates.csv +32 -0
data/spec/io/io_spec.rb +2 -2
data/spec/maths/arithmetic/vector_spec.rb +18 -0
data/spec/maths/statistics/vector_spec.rb +54 -38
data/spec/plotting/nyaplot/dataframe_spec.rb +23 -0
data/spec/spec_helper.rb +1 -1
data/spec/vector_spec.rb +39 -0
metadata +25 -3

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 10338f8d554cc2c70b2dcc2d8fd029e73446f4de
-  data.tar.gz: b848b5923eebe90577ef93d4c9e988d7aa09fe9b
+  metadata.gz: 69452b32fd8ef0ef7fb4ed58ab53ffa8aa15806d
+  data.tar.gz: 56927c77adbe7941eb2ca9a5e44d705931aad237
 SHA512:
-  metadata.gz: ad6c0de4217a65a2c6245c4b969e98d970da47a08ce8128b060cd06a13ae415df6d5e7fdc7bba4c84115fc8559e80bea0a39c87efb1c3401df358f4df5d43117
-  data.tar.gz: 34abe4afd0c88c24d3d0bbd6f11df5efe554f50b0d3d045a8a969f4eae2232d8d1cfeead814e336da507ca94b3126447631699258fe1a5dc5ea736de77c587f8
+  metadata.gz: 8e7511133b3409f7821cfec944a950d53df57bcd5893bb8a9557c013f31bf1e4a9cc07bbe1c143c63684f00f7d8d8f1adf3b31df732508e667ba6677f47d1d96
+  data.tar.gz: fc4beb70106372a276b21e0da645951595e5674f56e4422752aeeabc9cc2156983add90e59486aea4d88386fbeb2896d15f7ede30667bc84027abd900ee42e0e

data/.github/ISSUE_TEMPLATE.md ADDED

@@ -0,0 +1,18 @@
+Heya! We are glad you are going to contribute to Daru by creating an issue, and kindly ask you to
+follow the simple rules:
+1. If it is a bug report, please provide a **self-containing** Ruby code for reproducing the bug.
+  This means if Daru contributors just copy-paste the code from issue into `this-is-bug.rb` and run
+  `ruby this-is-bug.rb`, it will be reproduced. If the bug is hard to spot (e.g. it is not some
+  `NoMethodError`, but the differences in data structure), please show it with comment in code or
+  plain text in the issue.
+2. If it is a feature request, try to do the following (if possible):
+  * show how new feature will work with small code example;
+  * explain the use case (if it is not 200% obvious);
+  * if you are aware of it, show how it works in pandas and/or R.
+3. If it is just a question ("how to do this or that" or "why Daru does this or that") feel free to
+  write it in any form that is convenient to you, but remember code examples and use cases are always
+  welcome.
+Thanks! And please remove this text when finished with your issue description :)

data/.rubocop.yml CHANGED

@@ -12,6 +12,7 @@ AllCops:
     - 'vendor/**/*'
     - 'benchmarks/*'
     - 'profile/*'
+    - 'tmp/*'
   DisplayCopNames: true
   TargetRubyVersion: 2.0

data/.travis.yml CHANGED

@@ -9,12 +9,17 @@ rvm:
   - '2.4.0'
 matrix:
+  allow_failures:
+    - rvm: '2.0'
   fast_finish:
     true
 script:
+  - bundle add yard-junk
+  - bundle install
   - bundle exec rspec
   - bundle exec rubocop
+  - bundle exec yard-junk
 install:
   - gem install bundler

data/History.md CHANGED

@@ -1,3 +1,31 @@
+# 0.2.0 (31 October 2017)
+* Major Enhancements
+  - Add `DataFrame#which` query DSL (experimental! @rainchen)
+  - Add `DataFrame/Vector#rolling_fillna` (@baarkerlounger)
+  - Add `GroupBy#aggregate` (@shekharrajak)
+  - Add `DataFrame#uniq` (@baarkerlounger)
+* Minor Enhancements
+  - Allow `Vector#count` to be called without param for category type Vector (@rainchen)
+  - Add option to `DataFrame#vector_sum` to skip nils (@parthm)
+  - Add installation instructions to README.md (@koishimasato)
+  - Add release policy documentation (@baarkerlounger)
+  - Set index as DataFrame's default x axis for nyaplot (@matugm)
+* Fixes
+  - Fix `DataFrame/Vector#to_s` when name is a symbol (@baarkerlounger)
+  - Force `Vector#proportions` to return float (@rainchen)
+  - `DataFrame#new` creates empty DataFrame when given empty hash (@parthm)
+  - Remove unnecessary backports dependencies (@zverok)
+  - Specify minimum packable dependency (@zverok)
+  - Preserve key/column order when creating DataFrame from hash (@baarkerlounger)
+  - Fix `DataFrame#add_row` for DF with multi-index (@zverok)
+  - Fix `Vector#min, `#max`, `#index_of_min`, `#index_of_max` (0.1.6 regression) (@athityakumar)
+  - Integrate yard-junk into CI (@rohitner)
+  - Remove Travis spec restriction (@zverok)
+  - Fix tuple sorting for DataFrames with nils (@baarkerlounger)
+  - Fix merge on index dropping default index (@rohitner)
 # 0.1.6 (04 August 2017)
 * Major Enhancements
   - Add support for reading HTML tables into DataFrames (@athityakumar)

data/README.md CHANGED

@@ -26,6 +26,12 @@ daru makes it easy and intuitive to process data predominantly through 2 data st
 * Quickly reducing data with pivot tables for quick data summary.
 * Import and export data from and to Excel, CSV, SQL Databases, ActiveRecord and plain text files.
+## Installation
+```console
+$ gem install daru
+```
 ## Notebooks
 #### Notebooks on most use cases

data/ReleasePolicy.md ADDED

@@ -0,0 +1,20 @@
+# Gem Release Policy
+Applicable to Daru > 0.1.6
+## Versioning
+Daru follows semantic versioning whereby the version number is always in the form MAJOR.MINOR.PATCH
+* Patch bump = Bug fixes
+* Minor bump = New features but backwards compatible
+* Major bump = API breaking changes
+For Major and Minor bumps release candidates should be released around 2 weeks prior to the bump and are indicated by MAJOR.MINOR.0.rc.
+For more information see the full semantic versioning specification at http://semver.org/.
+## Release Timing
+Patch releases should be done after every fix of a major bug (as tagged in the github issue tracker).
+Major releases should be kept to the minimum.

data/daru.gemspec CHANGED

@@ -52,6 +52,9 @@ EOF
   spec.add_runtime_dependency 'backports'
+  # it is required by NMatrix, yet we want to specify clearly which minimal version is OK
+  spec.add_runtime_dependency 'packable', '~> 1.3.9'
   spec.add_development_dependency 'spreadsheet', '~> 1.1.1'
   spec.add_development_dependency 'bundler', '~> 1.10'
   spec.add_development_dependency 'rake', '~>10.5'
@@ -75,6 +78,7 @@ EOF
   spec.add_development_dependency 'simplecov'
   spec.add_development_dependency 'gruff'
   spec.add_development_dependency 'webmock'
   if RUBY_VERSION < '2.1.0'
     spec.add_development_dependency 'nokogiri', '<= 1.6.8.1'
   else

data/lib/daru.rb CHANGED

@@ -105,6 +105,7 @@ require 'date'
 require 'daru/version.rb'
 require 'open-uri'
+require 'backports/2.1.0/array/to_h'
 require 'daru/index/index.rb'
 require 'daru/index/multi_index.rb'
@@ -124,5 +125,3 @@ require 'daru/core/merge.rb'
 require 'daru/date_time/offsets.rb'
 require 'daru/date_time/index.rb'
-require 'backports'

data/lib/daru/category.rb CHANGED

@@ -1,5 +1,7 @@
 module Daru
   module Category # rubocop:disable Metrics/ModuleLength
+    UNDEFINED = Object.new.freeze
     attr_accessor :base_category
     attr_reader :index, :coding_scheme, :name
@@ -113,7 +115,7 @@ module Daru
     end
     # Associates a category to the vector.
-    # @param [Array] *new_categories new categories to be associated
+    # @param [Array] new_categories new categories to be associated
     # @example
     #   dv = Daru::Vector.new [:a, 1, :a, 1, :c], type: :category
     #   dv.add_category :b
@@ -131,7 +133,10 @@ module Daru
     #   dv = Daru::Vector.new [:a, 1, :a, 1, :c], type: :category
     #   dv.count :a
     #   # => 2
-    def count category
+    #   dv.count
+    #   # => 5
+    def count category=UNDEFINED
+      return @cat_hash.values.map(&:size).inject(&:+) if category == UNDEFINED # count all
       raise ArgumentError, "Invalid category #{category}" unless
         categories.include?(category)
@@ -167,7 +172,7 @@ module Daru
     end
     # Returns vector for indexes/positions specified
-    # @param [Array] *indexes indexes/positions for which values has to be retrived
+    # @param [Array] indexes for which values has to be retrived
     # @note Since it accepts both indexes and postions. In case of collision,
     #   arguement will be treated as index
     # @return vector containing values specified at specified indexes/positions
@@ -196,7 +201,7 @@ module Daru
     end
     # Returns vector for positions specified.
-    # @param [Array] *positions positions at which values to be retrived.
+    # @param [Array] positions at which values to be retrived.
     # @return vector containing values specified at specified positions
     # @example
     #   dv = Daru::Vector.new [:a, 1, :a, 1, :c], type: :category
@@ -223,7 +228,7 @@ module Daru
     # Modifies values at specified indexes/positions.
     # @note In order to add a new category you need to associate it via #add_category
-    # @param [Array] *indexes indexes/positions at which to modify value
+    # @param [Array] indexes at which to modify value
     # @param [object] val value to assign at specific indexes/positions
     # @return modified vector
     # @example
@@ -584,7 +589,7 @@ module Daru
     alias :gteq :mteq
     # For querying the data
-    # @param [object] arel like query syntax
+    # @param bool_array [object] arel like query syntax
     # @return [Daru::Vector] Vector which makes the conditions true
     # @example
     #   dv = Daru::Vector.new ['I', 'II', 'I', 'III', 'I', 'II'],
@@ -658,7 +663,7 @@ module Daru
     end
     # Check if any one of mentioned values occur in the vector
-    # @param [Array] *values values to check for
+    # @param [Array] values to check for
     # @return [true, false] returns true if any one of specified values
     #   occur in the vector
     # @example
@@ -670,7 +675,7 @@ module Daru
     end
     # Return a vector with specified values removed
-    # @param [Array] *values values to reject from resultant vector
+    # @param [Array] values to reject from resultant vector
     # @return [Daru::Vector] vector with specified values removed
     # @example
     #   dv = Daru::Vector.new [1, 2, nil, Float::NAN], type: :category
@@ -689,7 +694,7 @@ module Daru
     end
     # Count the number of values specified
-    # @param [Array] *values values to count for
+    # @param [Array] values to count for
     # @return [Integer] the number of times the values mentioned occurs
     # @example
     #   dv = Daru::Vector.new [1, 2, 1, 2, 3, 4, nil, nil]
@@ -702,7 +707,7 @@ module Daru
     end
     # Return indexes of values specified
-    # @param [Array] *values values to find indexes for
+    # @param [Array] values to find indexes for
     # @return [Array] array of indexes of values specified
     # @example
     #   dv = Daru::Vector.new [1, 2, nil, Float::NAN], index: 11..14

data/lib/daru/core/group_by.rb CHANGED

@@ -11,12 +11,14 @@ module Daru
         end
       end
-      TUPLE_SORTER = lambda do |a, b|
-        if a && b
-          a.compact <=> b.compact
-        else
-          a ? 1 : -1
-        end
+      TUPLE_SORTER = lambda do |left, right|
+        return -1 unless right
+        return 1 unless left
+        left = left.compact
+        right = right.compact
+        return left <=> right || 0 if left.length == right.length
+        left.length <=> right.length
       end
       def initialize context, names
@@ -203,8 +205,8 @@ module Daru
       # Iteratively applies a function to the values in a group and accumulates the result.
       # @param init (nil) The initial value of the accumulator.
-      # @param block [Proc] A proc or lambda that accepts two arguments.  The first argument
-      #                     is the accumulated result.  The second argument is a DataFrame row.
+      # @yieldparam block [Proc] A proc or lambda that accepts two arguments.  The first argument
+      #                          is the accumulated result.  The second argument is a DataFrame row.
       # @example Usage of reduce
       #   df = Daru::DataFrame.new({
       #     a: ['a','b'] * 3,
@@ -243,6 +245,47 @@ module Daru
         @df.inspect
       end
+      # Function to use for aggregating the data.
+      # `group_by` is using Daru::DataFrame#aggregate
+      #
+      # @param options [Hash] options for column, you want in resultant dataframe
+      #
+      # @return [Daru::DataFrame]
+      #
+      # @example
+      #
+      #   df = Daru::DataFrame.new(
+      #     name: ['Ram','Krishna','Ram','Krishna','Krishna'],
+      #     visited: ['Hyderabad', 'Delhi', 'Mumbai', 'Raipur', 'Banglore'])
+      #
+      #   => #<Daru::DataFrame(5x2)>
+      #                   name   visited
+      #            0       Ram Hyderabad
+      #            1   Krishna     Delhi
+      #            2       Ram    Mumbai
+      #            3   Krishna    Raipur
+      #            4   Krishna  Banglore
+      #
+      #   df.group_by(:name)
+      #   => #<Daru::DataFrame(5x1)>
+      #                          visited
+      #      Krishna         1     Delhi
+      #                      3    Raipur
+      #                      4  Banglore
+      #          Ram         0 Hyderabad
+      #                      2    Mumbai
+      #
+      #   df.group_by(:name).aggregate(visited: -> (vec){vec.to_a.join(',')})
+      #   => #<Daru::DataFrame(2x1)>
+      #                  visited
+      #       Krishna Delhi,Raipur,Banglore
+      #           Ram Hyderabad,Mumbai
+      #
+      def aggregate(options={})
+        @df.index = @df.index.remove_layer(@df.index.levels.size - 1)
+        @df.aggregate(options)
+      end
       private
       def init_groups_df tuples, names

data/lib/daru/dataframe.rb CHANGED

@@ -84,7 +84,7 @@ module Daru
       # Read a dataframe from AR::Relation
       #
       # @param relation [ActiveRecord::Relation] An AR::Relation object from which data is loaded
-      # @params fields [Array] Field names to be loaded (optional)
+      # @param fields [Array] Field names to be loaded (optional)
       #
       # @return A dataframe containing the data loaded from the relation
       #
@@ -277,6 +277,17 @@ module Daru
     # Default to *true*.
     #
     # == Usage
+    #
+    #   df = Daru::DataFrame.new
+    #   # =>
+    #   # <Daru::DataFrame(0x0)>
+    #   # Creates an empty DataFrame with no rows or columns.
+    #
+    #   df = Daru::DataFrame.new({}, order: [:a, :b])
+    #   #<Daru::DataFrame(0x2)>
+    #     a   b
+    #   # Creates a DataFrame with no rows and columns :a and :b
+    #
     #   df = Daru::DataFrame.new({a: [1,2,3,4], b: [6,7,8,9]}, order: [:b, :a],
     #     index: [:a, :b, :c, :d], name: :spider_man)
     #
@@ -329,7 +340,7 @@ module Daru
     #    #    1   4  14  44
     #    #    2   5  15  55
-    def initialize source, opts={} # rubocop:disable Metrics/MethodLength
+    def initialize source={}, opts={} # rubocop:disable Metrics/MethodLength
       vectors, index = opts[:order], opts[:index] # FIXME: just keyword arges after Ruby 2.1
       @data = []
       @name = opts[:name]
@@ -375,7 +386,7 @@ module Daru
     end
     # Retrive rows by positions
-    # @param [Array<Integer>] *positions positions of rows to retrive
+    # @param [Array<Integer>] positions of rows to retrive
     # @return [Daru::Vector, Daru::DataFrame] vector for single position and dataframe for multiple positions
     # @example
     #   df = Daru::DataFrame.new({
@@ -405,7 +416,7 @@ module Daru
     # Set rows by positions
     # @param [Array<Integer>] positions positions of rows to set
-    # @vector [Array, Daru::Vector] vector vector to be assigned
+    # @param [Array, Daru::Vector] vector vector to be assigned
     # @example
     #   df = Daru::DataFrame.new({
     #     a: [1, 2, 3],
@@ -438,7 +449,7 @@ module Daru
     end
     # Retrive vectors by positions
-    # @param [Array<Integer>] *positions positions of vectors to retrive
+    # @param [Array<Integer>] positions of vectors to retrive
     # @return [Daru::Vector, Daru::DataFrame] vector for single position and dataframe for multiple positions
     # @example
     #   df = Daru::DataFrame.new({
@@ -522,7 +533,7 @@ module Daru
     end
     def add_row row, index=nil
-      self.row[index || @size] = row
+      self.row[*(index || @size)] = row
     end
     def add_vector n, vector
@@ -597,7 +608,7 @@ module Daru
     # Returns a dataframe in which rows with any of the mentioned values
     #   are ignored.
-    # @param [Array] *values values to reject to form the new dataframe
+    # @param [Array] values to reject to form the new dataframe
     # @return [Daru::DataFrame] Data Frame with only rows which doesn't
     #   contain the mentioned values
     # @example
@@ -650,6 +661,88 @@ module Daru
       self
     end
+    # Rolling fillna
+    # replace all Float::NAN and NIL values with the preceeding or following value
+    #
+    # @param direction [Symbol] (:forward, :backward) whether replacement value is preceeding or following
+    #
+    # @example
+    #   df = Daru::DataFrame.new({
+    #    a: [1,    2,          3,   nil,        Float::NAN, nil, 1,   7],
+    #    b: [:a,  :b,          nil, Float::NAN, nil,        3,   5,   nil],
+    #    c: ['a',  Float::NAN, 3,   4,          3,          5,   nil, 7]
+    #   })
+    #
+    #   => #<Daru::DataFrame(8x3)>
+    #        a   b   c
+    #    0   1   a   a
+    #    1   2   b NaN
+    #    2   3 nil   3
+    #    3 nil NaN   4
+    #    4 NaN nil   3
+    #    5 nil   3   5
+    #    6   1   5 nil
+    #    7   7 nil   7
+    #
+    #   2.3.3 :068 > df.rolling_fillna(:forward)
+    #   => #<Daru::DataFrame(8x3)>
+    #        a   b   c
+    #    0   1   a   a
+    #    1   2   b   a
+    #    2   3   b   3
+    #    3   3   b   4
+    #    4   3   b   3
+    #    5   3   3   5
+    #    6   1   5   5
+    #    7   7   5   7
+    #
+    def rolling_fillna!(direction=:forward)
+      @data.each { |vec| vec.rolling_fillna!(direction) }
+    end
+    def rolling_fillna(direction=:forward)
+      dup.rolling_fillna!(direction)
+    end
+    # Return unique rows by vector specified or all vectors
+    #
+    # @param vtrs [String][Symbol] vector names(s) that should be considered
+    #
+    # @example
+    #
+    #    => #<Daru::DataFrame(6x2)>
+    #         a   b
+    #     0   1   a
+    #     1   2   b
+    #     2   3   c
+    #     3   4   d
+    #     2   3   c
+    #     3   4   f
+    #
+    #    2.3.3 :> df.unique
+    #    => #<Daru::DataFrame(5x2)>
+    #         a   b
+    #     0   1   a
+    #     1   2   b
+    #     2   3   c
+    #     3   4   d
+    #     3   4   f
+    #
+    #    2.3.3 :> df.unique(:a)
+    #    => #<Daru::DataFrame(5x2)>
+    #         a   b
+    #     0   1   a
+    #     1   2   b
+    #     2   3   c
+    #     3   4   d
+    #
+    def uniq(*vtrs)
+      vecs = vtrs.empty? ? vectors.map(&:to_s) : Array(vtrs)
+      grouped = group_by(vecs)
+      indexes = grouped.groups.values.map { |v| v[0] }.sort
+      row[*indexes]
+    end
     # Iterate over each index of the DataFrame.
     def each_index &block
       return to_enum(:each_index) unless block_given?
@@ -1024,9 +1117,9 @@ module Daru
       dup.tap { |df| df.keep_vector_if(&block) }
     end
-    # Test each row with one or more tests. Each test is a Proc with the form
-    # *Proc.new {|row| row[:age] > 0}*
-    #
+    # Test each row with one or more tests.
+    # @param tests [Proc]  Each test is a Proc with the form
+    #                      *Proc.new {|row| row[:age] > 0}*
     # The function returns an array with all errors.
     #
     # FIXME: description here is too sparse. As far as I can get,
@@ -1128,7 +1221,7 @@ module Daru
     deprecate :flawed?, :include_values?, 2016, 10
     # Check if any of given values occur in the data frame
-    # @param [Array] *values values to check for
+    # @param [Array] values to check for
     # @return [true, false] true if any of the given values occur in the
     #   dataframe, false otherwise
     # @example
@@ -1259,13 +1352,60 @@ module Daru
     alias :last :tail
-    # Returns a vector with sum of all vectors specified in the argument.
-    # If vecs parameter is empty, sum all numeric vector.
-    def vector_sum vecs=nil
+    # Sum all numeric/specified vectors in the DataFrame.
+    #
+    # Returns a new vector that's a containing a sum of all numeric
+    # or specified vectors of the DataFrame. By default, if the vector
+    # contains a nil, the sum is nil.
+    # With :skipnil argument set to true, nil values are assumed to be
+    # 0 (zero) and the sum vector is returned.
+    #
+    # @param args [Array] List of vectors to sum. Default is nil in which case
+    #   all numeric vectors are summed.
+    #
+    # @option opts [Boolean] :skipnil Consider nils as 0. Default is false.
+    #
+    # @return Vector with sum of all vectors specified in the argument.
+    #   If vecs parameter is empty, sum all numeric vector.
+    #
+    # @example
+    #    df = Daru::DataFrame.new({
+    #       a: [1, 2, nil],
+    #       b: [2, 1, 3],
+    #       c: [1, 1, 1]
+    #     })
+    #    => #<Daru::DataFrame(3x3)>
+    #           a   b   c
+    #       0   1   2   1
+    #       1   2   1   1
+    #       2 nil   3   1
+    #    df.vector_sum [:a, :c]
+    #    => #<Daru::Vector(3)>
+    #       0   2
+    #       1   3
+    #       2 nil
+    #    df.vector_sum
+    #    => #<Daru::Vector(3)>
+    #       0   4
+    #       1   4
+    #       2 nil
+    #    df.vector_sum skipnil: true
+    #    => #<Daru::Vector(3)>
+    #           c
+    #       0   4
+    #       1   4
+    #       2   4
+    #
+    def vector_sum(*args)
+      defaults = {vecs: nil, skipnil: false}
+      options = args.last.is_a?(::Hash) ? args.pop : {}
+      options = defaults.merge(options)
+      vecs = args[0] || options[:vecs]
+      skipnil = args[1] || options[:skipnil]
       vecs ||= numeric_vectors
       sum = Daru::Vector.new [0]*@size, index: @index, name: @name, dtype: @dtype
-      vecs.inject(sum) { |memo, n| memo + self[n] }
+      vecs.inject(sum) { |memo, n| self[n].add(memo, skipnil: skipnil) }
     end
     # Calculate mean of the rows of the dataframe.
@@ -1427,7 +1567,7 @@ module Daru
     # Reassign vectors with a new index of type Daru::Index or any of its subclasses.
     #
-    # @param [Daru::Index] idx The new index object on which the vectors are to
+    # @param new_index [Daru::Index] idx The new index object on which the vectors are to
     #   be indexed. Must of the same size as ncols.
     # @example Reassigning vectors of a DataFrame
     #   df = Daru::DataFrame.new({a: [1,2,3,4], b: [:a,:b,:c,:d], c: [11,22,33,44]})
@@ -1513,9 +1653,9 @@ module Daru
     # Sorts a dataframe (ascending/descending) in the given pripority sequence of
     # vectors, with or without a block.
     #
-    # @param order [Array] The order of vector names in which the DataFrame
+    # @param vector_order [Array] The order of vector names in which the DataFrame
     #   should be sorted.
-    # @param [Hash] opts The options to sort with.
+    # @param opts [Hash] opts The options to sort with.
     # @option opts [TrueClass,FalseClass,Array] :ascending (true) Sort in ascending
     #   or descending order. Specify Array corresponding to *order* for multiple
     #   sort orders.
@@ -1684,12 +1824,11 @@ module Daru
       new_fields = (@vectors.to_a + other_df.vectors.to_a)
       new_fields = ArrayHelper.recode_repeated(new_fields)
       DataFrame.new({}, order: new_fields).tap do |df_new|
         (0...nrows).each do |i|
           df_new.add_row row[i].to_a + other_df.row[i].to_a
         end
+        df_new.index = @index if @index == other_df.index
         df_new.update
       end
     end
@@ -2035,7 +2174,7 @@ module Daru
     end
     # Converts the specified non category type vectors to category type vectors
-    # @param [Array] *names names of non category type vectors to be converted
+    # @param [Array] names of non category type vectors to be converted
     # @return [Daru::DataFrame] data frame in which specified vectors have been
     #   converted to category type
     # @example
@@ -2126,8 +2265,88 @@ module Daru
       res
     end
+    # Function to use for aggregating the data.
+    #
+    # @param options [Hash] options for column, you want in resultant dataframe
+    #
+    # @return [Daru::DataFrame]
+    #
+    # @example
+    #   df = Daru::DataFrame.new(
+    #      {col: [:a, :b, :c, :d, :e], num: [52,12,07,17,01]})
+    #   => #<Daru::DataFrame(5x2)>
+    #        col num
+    #      0   a  52
+    #      1   b  12
+    #      2   c   7
+    #      3   d  17
+    #      4   e   1
+    #
+    #    df.aggregate(num_100_times: ->(df) { df.num*100 })
+    #   => #<Daru::DataFrame(5x1)>
+    #               num_100_ti
+    #             0       5200
+    #             1       1200
+    #             2        700
+    #             3       1700
+    #             4        100
+    #
+    #   When we have duplicate index :
+    #
+    #   idx = Daru::CategoricalIndex.new [:a, :b, :a, :a, :c]
+    #   df = Daru::DataFrame.new({num: [52,12,07,17,01]}, index: idx)
+    #   => #<Daru::DataFrame(5x1)>
+    #        num
+    #      a  52
+    #      b  12
+    #      a   7
+    #      a  17
+    #      c   1
+    #
+    #   df.aggregate(num: :mean)
+    #   => #<Daru::DataFrame(3x1)>
+    #                      num
+    #             a 25.3333333
+    #             b         12
+    #             c          1
+    #
+    # Note: `GroupBy` class `aggregate` method uses this `aggregate` method
+    # internally.
+    def aggregate(options={})
+      colmn_value, index_tuples = aggregated_colmn_value(options)
+      Daru::DataFrame.new(
+        colmn_value, index: index_tuples, order: options.keys
+      )
+    end
     private
+    # Do the `method` (`method` can be :sum, :mean, :std, :median, etc or
+    # lambda), on the column.
+    def apply_method_on_colmns colmn, index_tuples, method
+      rows = []
+      index_tuples.each do |indexes|
+        # If single element then also make it vector.
+        slice = Daru::Vector.new(Array(self[colmn][*indexes]))
+        case method
+        when Symbol
+          rows << (slice.is_a?(Daru::Vector) ? slice.send(method) : slice)
+        when Proc
+          rows << method.call(slice)
+        end
+      end
+      rows
+    end
+    def apply_method_on_df index_tuples, method
+      rows = []
+      index_tuples.each do |indexes|
+        slice = row[*indexes]
+        rows << method.call(slice)
+      end
+      rows
+    end
     def headers
       Daru::Index.new(Array(index.name) + @vectors.to_a)
     end
@@ -2224,9 +2443,7 @@ module Daru
         rescue IndexError
           raise IndexError, "Specified vector #{names.first} does not exist"
         end
         return @data[pos] if pos.is_a?(Numeric)
         names = pos
       end
@@ -2396,7 +2613,7 @@ module Daru
     end
     def create_vectors_index_with vectors, source
-      vectors = source.keys.sort_by(&:to_s) if vectors.nil?
+      vectors = source.keys if vectors.nil?
       @vectors =
         if vectors.is_a?(Index) || vectors.is_a?(MultiIndex)
@@ -2443,9 +2660,7 @@ module Daru
       @index   = Index.coerce(index || source[0].size)
       @vectors = Index.coerce(vectors)
-      @data = @vectors.each_with_index.map do |_vec,idx|
-        Daru::Vector.new(source[idx], index: @index, name: vectors[idx])
-      end
+      update_data source, vectors
     end
     def initialize_from_array_of_vectors source, vectors, index, opts
@@ -2694,6 +2909,30 @@ module Daru
       end
     end
+    def update_data source, vectors
+      @data = @vectors.each_with_index.map do |_vec,idx|
+        Daru::Vector.new(source[idx], index: @index, name: vectors[idx])
+      end
+    end
+    def aggregated_colmn_value(options)
+      colmn_value = []
+      index_tuples = Array(@index).uniq
+      options.keys.each do |vec|
+        do_this_on_vec = options[vec]
+        colmn_value << if @vectors.include?(vec)
+                         apply_method_on_colmns(
+                           vec, index_tuples, do_this_on_vec
+                         )
+                       else
+                         apply_method_on_df(
+                           index_tuples, do_this_on_vec
+                         )
+                       end
+      end
+      [colmn_value, index_tuples]
+    end
     # coerce ranges, integers and array in appropriate ways
     def coerce_positions *positions, size
       if positions.size == 1