daru 0.1.6 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -21,6 +21,8 @@ module Daru
21
21
  # df = Daru::DataFrame.new({a:['A', 'B', 'C', 'D', 'E'], b:[10,20,30,40,50]})
22
22
  # df.plot type: :bar, x: :a, y: :b
23
23
  def plot opts={}, &block
24
+ index_as_default_x_axis(opts) unless x_axis_defined?(opts)
25
+
24
26
  if opts[:categorized]
25
27
  plot_with_category(opts, &block)
26
28
  else
@@ -30,6 +32,15 @@ module Daru
30
32
 
31
33
  private
32
34
 
35
+ def x_axis_defined?(opts)
36
+ opts[:x] || opts.keys.any? { |k| k.to_s.match(/x\d+/) }
37
+ end
38
+
39
+ def index_as_default_x_axis(opts)
40
+ opts[:x] = :_index
41
+ self[:_index] = @index.to_a
42
+ end
43
+
33
44
  def plot_without_category opts
34
45
  options = {type: :scatter}.merge(opts)
35
46
 
@@ -228,7 +228,7 @@ module Daru
228
228
  end
229
229
 
230
230
  # Returns vector of values given positional values
231
- # @param [Array<object>] *positions positional values
231
+ # @param positions [Array<object>] positional values
232
232
  # @return [object] vector
233
233
  # @example
234
234
  # dv = Daru::Vector.new 'a'..'e'
@@ -252,7 +252,7 @@ module Daru
252
252
  end
253
253
 
254
254
  # Change value at given positions
255
- # @param [Array<object>] *positions positional values
255
+ # @param positions [Array<object>] positional values
256
256
  # @param [object] val value to assign
257
257
  # @example
258
258
  # dv = Daru::Vector.new 'a'..'e'
@@ -385,11 +385,11 @@ module Daru
385
385
  # comparator methods to obtain meaningful results. See this notebook for
386
386
  # a good overview of using #where.
387
387
  #
388
- # @param [Daru::Core::Query::BoolArray, Array<TrueClass, FalseClass>] bool_arry The
388
+ # @param bool_array [Daru::Core::Query::BoolArray, Array<TrueClass, FalseClass>] The
389
389
  # collection containing the true of false values. Each element in the Vector
390
390
  # corresponding to a `true` in the bool_arry will be returned alongwith it's
391
391
  # index.
392
- # @exmaple Usage of #where.
392
+ # @example Usage of #where.
393
393
  # vector = Daru::Vector.new([2,4,5,51,5,16,2,5,3,2,1,5,2,5,2,1,56,234,6,21])
394
394
  #
395
395
  # # Simple logic statement passed to #where.
@@ -451,7 +451,7 @@ module Daru
451
451
  deprecate :flawed?, :include_values?, 2016, 10
452
452
 
453
453
  # Check if any one of mentioned values occur in the vector
454
- # @param [Array] *values values to check for
454
+ # @param values [Array] values to check for
455
455
  # @return [true, false] returns true if any one of specified values
456
456
  # occur in the vector
457
457
  # @example
@@ -467,7 +467,7 @@ module Daru
467
467
  # Return vector of booleans with value at ith position is either
468
468
  # true or false depending upon whether value at position i is equal to
469
469
  # any of the values passed in the argument or not
470
- # @param [Array] *values values to equate with
470
+ # @param values [Array] values to equate with
471
471
  # @return [Daru::Vector] vector of boolean values
472
472
  # @example
473
473
  # dv = Daru::Vector.new [1, 2, 3, 2, 1]
@@ -760,6 +760,43 @@ module Daru
760
760
  self
761
761
  end
762
762
 
763
+ # Rolling fillna
764
+ # replace all Float::NAN and NIL values with the preceeding or following value
765
+ #
766
+ # @param direction [Symbol] (:forward, :backward) whether replacement value is preceeding or following
767
+ #
768
+ # @example
769
+ # dv = Daru::Vector.new([1, 2, 1, 4, nil, Float::NAN, 3, nil, Float::NAN])
770
+ #
771
+ # 2.3.3 :068 > dv.rolling_fillna(:forward)
772
+ # => #<Daru::Vector(9)>
773
+ # 0 1
774
+ # 1 2
775
+ # 2 1
776
+ # 3 4
777
+ # 4 4
778
+ # 5 4
779
+ # 6 3
780
+ # 7 3
781
+ # 8 3
782
+ #
783
+ def rolling_fillna!(direction=:forward)
784
+ enum = direction == :forward ? index : index.reverse_each
785
+ last_valid_value = 0
786
+ enum.each do |idx|
787
+ if valid_value?(self[idx])
788
+ last_valid_value = self[idx]
789
+ else
790
+ self[idx] = last_valid_value
791
+ end
792
+ end
793
+ end
794
+
795
+ # Non-destructive version of rolling_fillna!
796
+ def rolling_fillna(direction=:forward)
797
+ dup.rolling_fillna!(direction)
798
+ end
799
+
763
800
  # Lags the series by `k` periods.
764
801
  #
765
802
  # Lags the series by `k` periods, "shifting" data and inserting `nil`s
@@ -818,7 +855,7 @@ module Daru
818
855
  deprecate :n_valid, :count_values, 2016, 10
819
856
 
820
857
  # Count the number of values specified
821
- # @param [Array] *values values to count for
858
+ # @param values [Array] values to count for
822
859
  # @return [Integer] the number of times the values mentioned occurs
823
860
  # @example
824
861
  # dv = Daru::Vector.new [1, 2, 1, 2, 3, 4, nil, nil]
@@ -942,7 +979,7 @@ module Daru
942
979
  end
943
980
 
944
981
  # Create a summary of the Vector
945
- # @params [Fixnum] indent_level
982
+ # @param indent_level [Fixnum] indent level
946
983
  # @return [String] String containing the summary of the Vector
947
984
  # @example
948
985
  # dv = Daru::Vector.new [1, 2, 3]
@@ -1182,10 +1219,10 @@ module Daru
1182
1219
  #
1183
1220
  # == Arguments
1184
1221
  #
1185
- # @as_a [Symbol] Passing :array will return only the elements
1222
+ # @param as_a [Symbol] Passing :array will return only the elements
1186
1223
  # as an Array. Otherwise will return a Daru::Vector.
1187
1224
  #
1188
- # @duplicate [Symbol] In case no missing data is found in the
1225
+ # @param _duplicate [Symbol] In case no missing data is found in the
1189
1226
  # vector, setting this to false will return the same vector.
1190
1227
  # Otherwise, a duplicate will be returned irrespective of
1191
1228
  # presence of missing data.
@@ -1207,7 +1244,7 @@ module Daru
1207
1244
  deprecate :only_valid, :reject_values, 2016, 10
1208
1245
 
1209
1246
  # Return a vector with specified values removed
1210
- # @param [Array] *values values to reject from resultant vector
1247
+ # @param values [Array] values to reject from resultant vector
1211
1248
  # @return [Daru::Vector] vector with specified values removed
1212
1249
  # @example
1213
1250
  # dv = Daru::Vector.new [1, 2, nil, Float::NAN]
@@ -1229,7 +1266,7 @@ module Daru
1229
1266
  end
1230
1267
 
1231
1268
  # Return indexes of values specified
1232
- # @param [Array] *values values to find indexes for
1269
+ # @param values [Array] values to find indexes for
1233
1270
  # @return [Array] array of indexes of values specified
1234
1271
  # @example
1235
1272
  # dv = Daru::Vector.new [1, 2, nil, Float::NAN], index: 11..14
@@ -1437,6 +1474,11 @@ module Daru
1437
1474
  end
1438
1475
  end
1439
1476
 
1477
+ # Helper method returning validity of arbitrary value
1478
+ def valid_value?(v)
1479
+ v.respond_to?(:nan?) && v.nan? || v.nil? ? false : true
1480
+ end
1481
+
1440
1482
  def initialize_vector source, opts
1441
1483
  index, source = parse_source(source, opts)
1442
1484
  set_name opts[:name]
@@ -1502,7 +1544,7 @@ module Daru
1502
1544
  end
1503
1545
 
1504
1546
  # Note: To maintain sanity, this _MUST_ be the _ONLY_ place in daru where the
1505
- # @dtype variable is set and the underlying data type of vector changed.
1547
+ # @param dtype [db_type] variable is set and the underlying data type of vector changed.
1506
1548
  def cast_vector_to dtype, source=nil, nm_dtype=nil
1507
1549
  source = @data.to_a if source.nil?
1508
1550
 
@@ -1,3 +1,3 @@
1
1
  module Daru
2
- VERSION = '0.1.6'.freeze
2
+ VERSION = '0.2.0'.freeze
3
3
  end
@@ -0,0 +1,9 @@
1
+ require_relative '_base'
2
+
3
+ n = 40_0000
4
+ idx = (1..n).to_a.map(&:to_s)
5
+
6
+
7
+ __profile__ do
8
+ Daru::Vector.new(1..n, index: idx)
9
+ end
@@ -1596,7 +1596,7 @@ describe Daru::DataFrame, "categorical" do
1596
1596
  Daru::DataFrame.new({
1597
1597
  a: [1, 2, 3, 4, 5],
1598
1598
  b: ['first', 'second', 'first', 'second', 'third'],
1599
- c: ['a', 'b', 'a', 'b', 'c']
1599
+ c: ['a', 'b', 'a', 'b', nil]
1600
1600
  })
1601
1601
  end
1602
1602
  before { df.to_category :b, :c }
@@ -1605,6 +1605,10 @@ describe Daru::DataFrame, "categorical" do
1605
1605
  it { is_expected.to be_a Daru::DataFrame }
1606
1606
  its(:'b.type') { is_expected.to eq :category }
1607
1607
  its(:'c.type') { is_expected.to eq :category }
1608
+ its(:'a.count') { is_expected.to eq 5 }
1609
+ its(:'c.count') { is_expected.to eq 5 }
1610
+ it { expect(df.c.count('a')).to eq 2 }
1611
+ it { expect(df.c.count(nil)).to eq 1 }
1608
1612
  end
1609
1613
 
1610
1614
  context "#interact_code" do
@@ -465,4 +465,132 @@ describe Daru::Core::GroupBy do
465
465
 
466
466
  it { is_expected.to eq Daru::DataFrame.new({num: [6]}, index: ['a']) }
467
467
  end
468
+
469
+ context 'when dataframe tuples contain nils in mismatching positions' do
470
+
471
+ let(:df){
472
+ Daru::DataFrame.new(
473
+ {
474
+ 'string1' => ["Color", "Color", "Color", "Color", nil, "Color", "Color", " Black and White"],
475
+ 'string2' => ["Test", "test2", nil, "test3", nil, "test", "test3", "test5"],
476
+ 'num' => [1, nil, 3, 4, 5, 6, 7, nil]
477
+ }
478
+ )
479
+ }
480
+
481
+ it 'groups by without errors' do
482
+ expect { df.group_by(df.vectors.map(&:to_s)) }.to_not raise_error(ArgumentError)
483
+ end
484
+ end
485
+
486
+ context '#aggregate' do
487
+ let(:dataframe) { Daru::DataFrame.new({
488
+ employee: %w[John Jane Mark John Jane Mark],
489
+ month: %w[June June June July July July],
490
+ salary: [1000, 500, 700, 1200, 600, 600]})
491
+ }
492
+ context 'group and aggregate sum for particular single vector' do
493
+ subject { dataframe.group_by([:employee]).aggregate(salary: :sum) }
494
+
495
+ it { is_expected.to eq Daru::DataFrame.new({
496
+ salary: [1100, 2200, 1300]},
497
+ index: ['Jane', 'John', 'Mark'])
498
+ }
499
+ end
500
+
501
+ context 'group and aggregate sum for two vectors' do
502
+ subject {
503
+ dataframe.group_by([:employee, :month]).aggregate(salary: :sum) }
504
+
505
+ it { is_expected.to eq Daru::DataFrame.new({
506
+ salary: [600, 500, 1200, 1000, 600, 700]},
507
+ index: Daru::MultiIndex.from_tuples([
508
+ ['Jane', 'July'],
509
+ ['Jane', 'June'],
510
+ ['John', 'July'],
511
+ ['John', 'June'],
512
+ ['Mark', 'July'],
513
+ ['Mark', 'June']
514
+ ])
515
+ )}
516
+ end
517
+
518
+ context 'group and aggregate sum and lambda function for vectors' do
519
+ subject { dataframe.group_by([:employee]).aggregate(
520
+ salary: :sum,
521
+ month: ->(vec) { vec.to_a.join('/') }) }
522
+
523
+ it { is_expected.to eq Daru::DataFrame.new({
524
+ salary: [1100, 2200, 1300],
525
+ month: ['June/July', 'June/July', 'June/July']},
526
+ index: ['Jane', 'John', 'Mark'],
527
+ order: [:salary, :month])
528
+ }
529
+ end
530
+
531
+ context 'group and aggregate sum and lambda functions on dataframe' do
532
+ subject { dataframe.group_by([:employee]).aggregate(
533
+ salary: :sum,
534
+ month: ->(vec) { vec.to_a.join('/') },
535
+ mean_salary: ->(df) { df.salary.mean },
536
+ periods: ->(df) { df.size }
537
+ )}
538
+
539
+ it { is_expected.to eq Daru::DataFrame.new({
540
+ salary: [1100, 2200, 1300],
541
+ month: ['June/July', 'June/July', 'June/July'],
542
+ mean_salary: [550.0, 1100.0, 650.0],
543
+ periods: [2, 2, 2]},
544
+ index: ['Jane', 'John', 'Mark'], order: [:salary, :month,
545
+ :mean_salary, :periods]) }
546
+ end
547
+
548
+ context 'group_by and aggregate on mixed MultiIndex' do
549
+ let(:df) { Daru::DataFrame.new(
550
+ name: ['Ram','Krishna','Ram','Krishna','Krishna'],
551
+ visited: [
552
+ 'Hyderabad', 'Delhi', 'Mumbai', 'Raipur', 'Banglore']
553
+ )
554
+ }
555
+ let(:df_mixed) { Daru::DataFrame.new(
556
+ name: ['Krishna','Ram','Krishna','Krishna'],
557
+ visited: [
558
+ 'Delhi', 'Mumbai', 'Raipur', 'Banglore']
559
+ )
560
+ }
561
+ it 'group_by' do
562
+ expect(df.group_by(:name).df).to eq(
563
+ Daru::DataFrame.new({
564
+ visited: ['Delhi', 'Raipur', 'Banglore', 'Hyderabad', 'Mumbai']},
565
+ index: Daru::MultiIndex.from_tuples(
566
+ [['Krishna', 1], ['Krishna', 3], ['Krishna', 4],
567
+ ['Ram', 0], ['Ram', 2]]
568
+ )
569
+ )
570
+ )
571
+ end
572
+
573
+ it 'group_by and aggregate' do
574
+ expect(
575
+ df.group_by(:name).aggregate(
576
+ visited: -> (vec){vec.to_a.join(',')})).to eq(
577
+ Daru::DataFrame.new({
578
+ visited: ['Delhi,Raipur,Banglore', 'Hyderabad,Mumbai']},
579
+ index: ['Krishna', 'Ram']
580
+ )
581
+ )
582
+ end
583
+
584
+ it 'group_by and aggregate when anyone index is not multiple times' do
585
+ expect(
586
+ df_mixed.group_by(:name).aggregate(
587
+ visited: -> (vec){vec.to_a.join(',')})).to eq(
588
+ Daru::DataFrame.new({
589
+ visited: ['Delhi,Raipur,Banglore', 'Mumbai']},
590
+ index: ['Krishna', 'Ram']
591
+ )
592
+ )
593
+ end
594
+ end
595
+ end
468
596
  end
@@ -145,8 +145,15 @@ describe Daru::DataFrame do
145
145
  end
146
146
 
147
147
  context "#initialize" do
148
+
149
+ it "initializes an empty DataFrame with no arguments" do
150
+ df = Daru::DataFrame.new
151
+ expect(df.nrows).to eq(0)
152
+ expect(df.ncols).to eq(0)
153
+ end
154
+
148
155
  context Daru::Index do
149
- it "initializes an empty DataFrame" do
156
+ it "initializes an empty DataFrame with empty source arg" do
150
157
  df = Daru::DataFrame.new({}, order: [:a, :b])
151
158
 
152
159
  expect(df.vectors).to eq(Daru::Index.new [:a, :b])
@@ -164,6 +171,13 @@ describe Daru::DataFrame do
164
171
  expect(df.a) .to eq([1,2,3,4,5].dv(:a, df.index))
165
172
  end
166
173
 
174
+ it "initializes from a Hash and preserves default order" do
175
+ df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
176
+ index: [:one, :two, :three, :four, :five])
177
+
178
+ expect(df.vectors).to eq(Daru::Index.new [:b, :a])
179
+ end
180
+
167
181
  it "initializes from a Hash of Vectors" do
168
182
  va = Daru::Vector.new([1,2,3,4,5], index: [:one, :two, :three, :four, :five])
169
183
  vb = Daru::Vector.new([11,12,13,14,15], index: [:one, :two, :three, :four, :five])
@@ -221,7 +235,7 @@ describe Daru::DataFrame do
221
235
  df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]})
222
236
 
223
237
  expect(df.index) .to eq(Daru::Index.new [0,1,2,3,4])
224
- expect(df.vectors).to eq(Daru::Index.new [:a, :b])
238
+ expect(df.vectors).to eq(Daru::Index.new [:b, :a])
225
239
  end
226
240
 
227
241
  it "aligns indexes properly" do
@@ -1573,6 +1587,22 @@ describe Daru::DataFrame do
1573
1587
  }
1574
1588
  end
1575
1589
 
1590
+ context 'with mulitiindex DF' do
1591
+ subject(:data_frame) {
1592
+ Daru::DataFrame.new({b: [11,12,13], a: [1,2,3],
1593
+ c: [11,22,33]}, order: [:a, :b, :c],
1594
+ index: Daru::MultiIndex.from_tuples([[:one, :two], [:one, :three], [:two, :four]]))
1595
+ }
1596
+
1597
+ before { data_frame.add_row [100,200,300], [:two, :five] }
1598
+
1599
+ it { is_expected.to eq(Daru::DataFrame.new({
1600
+ b: [11,12,13,200], a: [1,2,3,100],
1601
+ c: [11,22,33,300]}, order: [:a, :b, :c],
1602
+ index: Daru::MultiIndex.from_tuples([[:one, :two], [:one, :three], [:two, :four], [:two, :five]])))
1603
+ }
1604
+ end
1605
+
1576
1606
  it "allows adding rows after making empty DF by specfying only order" do
1577
1607
  df = Daru::DataFrame.new({}, order: [:a, :b, :c])
1578
1608
  df.add_row [1,2,3]
@@ -1790,6 +1820,59 @@ describe Daru::DataFrame do
1790
1820
  end
1791
1821
  end
1792
1822
 
1823
+ describe 'uniq' do
1824
+ let(:df) do
1825
+ Daru::DataFrame.from_csv 'spec/fixtures/duplicates.csv'
1826
+ end
1827
+
1828
+ context 'with no args' do
1829
+ it do
1830
+ result = df.uniq
1831
+ expect(result.shape.first).to eq 30
1832
+ end
1833
+ end
1834
+
1835
+ context 'given a vector' do
1836
+ it do
1837
+ result = df.uniq("color")
1838
+ expect(result.shape.first).to eq 2
1839
+ end
1840
+ end
1841
+
1842
+ context 'given an array of vectors' do
1843
+ it do
1844
+ result = df.uniq("color", "director_name")
1845
+ expect(result.shape.first).to eq 29
1846
+ end
1847
+ end
1848
+ end
1849
+
1850
+ context '#rolling_fillna!' do
1851
+ subject do
1852
+ Daru::DataFrame.new({
1853
+ a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
1854
+ b: [:a, :b, nil, Float::NAN, nil, 3, 5, nil],
1855
+ c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
1856
+ })
1857
+ end
1858
+
1859
+ context 'rolling_fillna! forwards' do
1860
+ before { subject.rolling_fillna!(:forward) }
1861
+ it { is_expected.to be_a Daru::DataFrame }
1862
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, 3, 3, 3, 1, 7] }
1863
+ its(:'b.to_a') { is_expected.to eq [:a, :b, :b, :b, :b, 3, 5, 5] }
1864
+ its(:'c.to_a') { is_expected.to eq ['a', 'a', 3, 4, 3, 5, 5, 7] }
1865
+ end
1866
+
1867
+ context 'rolling_fillna! backwards' do
1868
+ before { subject.rolling_fillna!(:backward) }
1869
+ it { is_expected.to be_a Daru::DataFrame }
1870
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, 1, 1, 1, 1, 7] }
1871
+ its(:'b.to_a') { is_expected.to eq [:a, :b, 3, 3, 3, 3, 5, 0] }
1872
+ its(:'c.to_a') { is_expected.to eq ['a', 3, 3, 4, 3, 5, 7, 7] }
1873
+ end
1874
+ end
1875
+
1793
1876
  context "#clone" do
1794
1877
  it "returns a view of the whole dataframe" do
1795
1878
  cloned = @data_frame.clone
@@ -3160,6 +3243,15 @@ describe Daru::DataFrame do
3160
3243
  it { expect(subject['a'].to_a).to eq [1, 2, 3] }
3161
3244
  it { expect(subject[:b].to_a).to eq [4, 5, 6] }
3162
3245
  end
3246
+
3247
+ context "preserves indices for dataframes with same index" do
3248
+ let(:index) { ['one','two','three'] }
3249
+ let(:df1) { Daru::DataFrame.new({ 'a' => [1, 2, 3], 'b' => [3, 4, 5] }, index: index) }
3250
+ let(:df2) { Daru::DataFrame.new({ 'c' => [4, 5, 6], 'd' => [7, 8, 9] }, index: index) }
3251
+ subject { df1.merge df2 }
3252
+
3253
+ its(:index) { is_expected.to eq Daru::Index.new(index) }
3254
+ end
3163
3255
  end
3164
3256
 
3165
3257
  context "#vector_by_calculation" do
@@ -3176,23 +3268,27 @@ describe Daru::DataFrame do
3176
3268
 
3177
3269
  context "#vector_sum" do
3178
3270
  before do
3179
- a1 = Daru::Vector.new [1, 2, 3, 4, 5, nil]
3180
- a2 = Daru::Vector.new [10, 10, 20, 20, 20, 30]
3181
- b1 = Daru::Vector.new [nil, 1, 1, 1, 1, 2]
3182
- b2 = Daru::Vector.new [2, 2, 2, nil, 2, 3]
3271
+ a1 = Daru::Vector.new [1, 2, 3, 4, 5, nil, nil]
3272
+ a2 = Daru::Vector.new [10, 10, 20, 20, 20, 30, nil]
3273
+ b1 = Daru::Vector.new [nil, 1, 1, 1, 1, 2, nil]
3274
+ b2 = Daru::Vector.new [2, 2, 2, nil, 2, 3, nil]
3183
3275
  @df = Daru::DataFrame.new({ :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2 })
3184
3276
  end
3185
3277
 
3186
3278
  it "calculates complete vector sum" do
3187
- expect(@df.vector_sum).to eq(Daru::Vector.new [nil, 15, 26, nil, 28, nil])
3279
+ expect(@df.vector_sum).to eq(Daru::Vector.new [nil, 15, 26, nil, 28, nil, nil])
3280
+ end
3281
+
3282
+ it "ignores nils if skipnil is true" do
3283
+ expect(@df.vector_sum skipnil: true).to eq(Daru::Vector.new [13, 15, 26, 25, 28, 35, 0])
3188
3284
  end
3189
3285
 
3190
3286
  it "calculates partial vector sum" do
3191
3287
  a = @df.vector_sum([:a1, :a2])
3192
3288
  b = @df.vector_sum([:b1, :b2])
3193
3289
 
3194
- expect(a).to eq(Daru::Vector.new [11, 12, 23, 24, 25, nil])
3195
- expect(b).to eq(Daru::Vector.new [nil, 3, 3, nil, 3, 5])
3290
+ expect(a).to eq(Daru::Vector.new [11, 12, 23, 24, 25, nil, nil])
3291
+ expect(b).to eq(Daru::Vector.new [nil, 3, 3, nil, 3, 5, nil])
3196
3292
  end
3197
3293
  end
3198
3294
 
@@ -3380,7 +3476,8 @@ describe Daru::DataFrame do
3380
3476
  ev_b = Daru::Vector.new [1, 1, 0]
3381
3477
  ev_c = Daru::Vector.new [0, 1, 1]
3382
3478
  df2 = Daru::DataFrame.new({
3383
- :_id => ev_id, 'a' => ev_a, 'b' => ev_b, 'c' => ev_c })
3479
+ :_id => ev_id, 'a' => ev_a, 'b' => ev_b, 'c' => ev_c },
3480
+ order: ['a', 'b', 'c', :_id])
3384
3481
 
3385
3482
  expect(df2).to eq(df)
3386
3483
  end
@@ -3928,6 +4025,24 @@ describe Daru::DataFrame do
3928
4025
  end
3929
4026
  end
3930
4027
 
4028
+ context '#aggregate' do
4029
+ let(:cat_idx) { Daru::CategoricalIndex.new [:a, :b, :a, :a, :c] }
4030
+ let(:df) { Daru::DataFrame.new(num: [52,12,07,17,01], cat_index: cat_idx) }
4031
+ let(:df_cat_idx) {
4032
+ Daru::DataFrame.new({num: [52,12,07,17,01]}, index: cat_idx) }
4033
+
4034
+ it 'lambda function on particular column' do
4035
+ expect(df.aggregate(num_100_times: ->(df) { df.num*100 })).to eq(
4036
+ Daru::DataFrame.new(num_100_times: [5200, 1200, 700, 1700, 100])
4037
+ )
4038
+ end
4039
+ it 'aggregate sum on particular column' do
4040
+ expect(df_cat_idx.aggregate(num: :sum)).to eq(
4041
+ Daru::DataFrame.new({num: [76, 12, 1]}, index: [:a, :b, :c])
4042
+ )
4043
+ end
4044
+ end
4045
+
3931
4046
  context '#create_sql' do
3932
4047
  let(:df) { Daru::DataFrame.new({
3933
4048
  a: [1,2,3],