daru 0.1.6 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,6 +21,8 @@ module Daru
21
21
  # df = Daru::DataFrame.new({a:['A', 'B', 'C', 'D', 'E'], b:[10,20,30,40,50]})
22
22
  # df.plot type: :bar, x: :a, y: :b
23
23
  def plot opts={}, &block
24
+ index_as_default_x_axis(opts) unless x_axis_defined?(opts)
25
+
24
26
  if opts[:categorized]
25
27
  plot_with_category(opts, &block)
26
28
  else
@@ -30,6 +32,15 @@ module Daru
30
32
 
31
33
  private
32
34
 
35
+ def x_axis_defined?(opts)
36
+ opts[:x] || opts.keys.any? { |k| k.to_s.match(/x\d+/) }
37
+ end
38
+
39
+ def index_as_default_x_axis(opts)
40
+ opts[:x] = :_index
41
+ self[:_index] = @index.to_a
42
+ end
43
+
33
44
  def plot_without_category opts
34
45
  options = {type: :scatter}.merge(opts)
35
46
 
@@ -228,7 +228,7 @@ module Daru
228
228
  end
229
229
 
230
230
  # Returns vector of values given positional values
231
- # @param [Array<object>] *positions positional values
231
+ # @param positions [Array<object>] positional values
232
232
  # @return [object] vector
233
233
  # @example
234
234
  # dv = Daru::Vector.new 'a'..'e'
@@ -252,7 +252,7 @@ module Daru
252
252
  end
253
253
 
254
254
  # Change value at given positions
255
- # @param [Array<object>] *positions positional values
255
+ # @param positions [Array<object>] positional values
256
256
  # @param [object] val value to assign
257
257
  # @example
258
258
  # dv = Daru::Vector.new 'a'..'e'
@@ -385,11 +385,11 @@ module Daru
385
385
  # comparator methods to obtain meaningful results. See this notebook for
386
386
  # a good overview of using #where.
387
387
  #
388
- # @param [Daru::Core::Query::BoolArray, Array<TrueClass, FalseClass>] bool_arry The
388
+ # @param bool_array [Daru::Core::Query::BoolArray, Array<TrueClass, FalseClass>] The
389
389
  # collection containing the true of false values. Each element in the Vector
390
390
  # corresponding to a `true` in the bool_arry will be returned alongwith it's
391
391
  # index.
392
- # @exmaple Usage of #where.
392
+ # @example Usage of #where.
393
393
  # vector = Daru::Vector.new([2,4,5,51,5,16,2,5,3,2,1,5,2,5,2,1,56,234,6,21])
394
394
  #
395
395
  # # Simple logic statement passed to #where.
@@ -451,7 +451,7 @@ module Daru
451
451
  deprecate :flawed?, :include_values?, 2016, 10
452
452
 
453
453
  # Check if any one of mentioned values occur in the vector
454
- # @param [Array] *values values to check for
454
+ # @param values [Array] values to check for
455
455
  # @return [true, false] returns true if any one of specified values
456
456
  # occur in the vector
457
457
  # @example
@@ -467,7 +467,7 @@ module Daru
467
467
  # Return vector of booleans with value at ith position is either
468
468
  # true or false depending upon whether value at position i is equal to
469
469
  # any of the values passed in the argument or not
470
- # @param [Array] *values values to equate with
470
+ # @param values [Array] values to equate with
471
471
  # @return [Daru::Vector] vector of boolean values
472
472
  # @example
473
473
  # dv = Daru::Vector.new [1, 2, 3, 2, 1]
@@ -760,6 +760,43 @@ module Daru
760
760
  self
761
761
  end
762
762
 
763
+ # Rolling fillna
764
+ # replace all Float::NAN and NIL values with the preceeding or following value
765
+ #
766
+ # @param direction [Symbol] (:forward, :backward) whether replacement value is preceeding or following
767
+ #
768
+ # @example
769
+ # dv = Daru::Vector.new([1, 2, 1, 4, nil, Float::NAN, 3, nil, Float::NAN])
770
+ #
771
+ # 2.3.3 :068 > dv.rolling_fillna(:forward)
772
+ # => #<Daru::Vector(9)>
773
+ # 0 1
774
+ # 1 2
775
+ # 2 1
776
+ # 3 4
777
+ # 4 4
778
+ # 5 4
779
+ # 6 3
780
+ # 7 3
781
+ # 8 3
782
+ #
783
+ def rolling_fillna!(direction=:forward)
784
+ enum = direction == :forward ? index : index.reverse_each
785
+ last_valid_value = 0
786
+ enum.each do |idx|
787
+ if valid_value?(self[idx])
788
+ last_valid_value = self[idx]
789
+ else
790
+ self[idx] = last_valid_value
791
+ end
792
+ end
793
+ end
794
+
795
+ # Non-destructive version of rolling_fillna!
796
+ def rolling_fillna(direction=:forward)
797
+ dup.rolling_fillna!(direction)
798
+ end
799
+
763
800
  # Lags the series by `k` periods.
764
801
  #
765
802
  # Lags the series by `k` periods, "shifting" data and inserting `nil`s
@@ -818,7 +855,7 @@ module Daru
818
855
  deprecate :n_valid, :count_values, 2016, 10
819
856
 
820
857
  # Count the number of values specified
821
- # @param [Array] *values values to count for
858
+ # @param values [Array] values to count for
822
859
  # @return [Integer] the number of times the values mentioned occurs
823
860
  # @example
824
861
  # dv = Daru::Vector.new [1, 2, 1, 2, 3, 4, nil, nil]
@@ -942,7 +979,7 @@ module Daru
942
979
  end
943
980
 
944
981
  # Create a summary of the Vector
945
- # @params [Fixnum] indent_level
982
+ # @param indent_level [Fixnum] indent level
946
983
  # @return [String] String containing the summary of the Vector
947
984
  # @example
948
985
  # dv = Daru::Vector.new [1, 2, 3]
@@ -1182,10 +1219,10 @@ module Daru
1182
1219
  #
1183
1220
  # == Arguments
1184
1221
  #
1185
- # @as_a [Symbol] Passing :array will return only the elements
1222
+ # @param as_a [Symbol] Passing :array will return only the elements
1186
1223
  # as an Array. Otherwise will return a Daru::Vector.
1187
1224
  #
1188
- # @duplicate [Symbol] In case no missing data is found in the
1225
+ # @param _duplicate [Symbol] In case no missing data is found in the
1189
1226
  # vector, setting this to false will return the same vector.
1190
1227
  # Otherwise, a duplicate will be returned irrespective of
1191
1228
  # presence of missing data.
@@ -1207,7 +1244,7 @@ module Daru
1207
1244
  deprecate :only_valid, :reject_values, 2016, 10
1208
1245
 
1209
1246
  # Return a vector with specified values removed
1210
- # @param [Array] *values values to reject from resultant vector
1247
+ # @param values [Array] values to reject from resultant vector
1211
1248
  # @return [Daru::Vector] vector with specified values removed
1212
1249
  # @example
1213
1250
  # dv = Daru::Vector.new [1, 2, nil, Float::NAN]
@@ -1229,7 +1266,7 @@ module Daru
1229
1266
  end
1230
1267
 
1231
1268
  # Return indexes of values specified
1232
- # @param [Array] *values values to find indexes for
1269
+ # @param values [Array] values to find indexes for
1233
1270
  # @return [Array] array of indexes of values specified
1234
1271
  # @example
1235
1272
  # dv = Daru::Vector.new [1, 2, nil, Float::NAN], index: 11..14
@@ -1437,6 +1474,11 @@ module Daru
1437
1474
  end
1438
1475
  end
1439
1476
 
1477
+ # Helper method returning validity of arbitrary value
1478
+ def valid_value?(v)
1479
+ v.respond_to?(:nan?) && v.nan? || v.nil? ? false : true
1480
+ end
1481
+
1440
1482
  def initialize_vector source, opts
1441
1483
  index, source = parse_source(source, opts)
1442
1484
  set_name opts[:name]
@@ -1502,7 +1544,7 @@ module Daru
1502
1544
  end
1503
1545
 
1504
1546
  # Note: To maintain sanity, this _MUST_ be the _ONLY_ place in daru where the
1505
- # @dtype variable is set and the underlying data type of vector changed.
1547
+ # @param dtype [db_type] variable is set and the underlying data type of vector changed.
1506
1548
  def cast_vector_to dtype, source=nil, nm_dtype=nil
1507
1549
  source = @data.to_a if source.nil?
1508
1550
 
@@ -1,3 +1,3 @@
1
1
  module Daru
2
- VERSION = '0.1.6'.freeze
2
+ VERSION = '0.2.0'.freeze
3
3
  end
@@ -0,0 +1,9 @@
1
+ require_relative '_base'
2
+
3
+ n = 40_0000
4
+ idx = (1..n).to_a.map(&:to_s)
5
+
6
+
7
+ __profile__ do
8
+ Daru::Vector.new(1..n, index: idx)
9
+ end
@@ -1596,7 +1596,7 @@ describe Daru::DataFrame, "categorical" do
1596
1596
  Daru::DataFrame.new({
1597
1597
  a: [1, 2, 3, 4, 5],
1598
1598
  b: ['first', 'second', 'first', 'second', 'third'],
1599
- c: ['a', 'b', 'a', 'b', 'c']
1599
+ c: ['a', 'b', 'a', 'b', nil]
1600
1600
  })
1601
1601
  end
1602
1602
  before { df.to_category :b, :c }
@@ -1605,6 +1605,10 @@ describe Daru::DataFrame, "categorical" do
1605
1605
  it { is_expected.to be_a Daru::DataFrame }
1606
1606
  its(:'b.type') { is_expected.to eq :category }
1607
1607
  its(:'c.type') { is_expected.to eq :category }
1608
+ its(:'a.count') { is_expected.to eq 5 }
1609
+ its(:'c.count') { is_expected.to eq 5 }
1610
+ it { expect(df.c.count('a')).to eq 2 }
1611
+ it { expect(df.c.count(nil)).to eq 1 }
1608
1612
  end
1609
1613
 
1610
1614
  context "#interact_code" do
@@ -465,4 +465,132 @@ describe Daru::Core::GroupBy do
465
465
 
466
466
  it { is_expected.to eq Daru::DataFrame.new({num: [6]}, index: ['a']) }
467
467
  end
468
+
469
+ context 'when dataframe tuples contain nils in mismatching positions' do
470
+
471
+ let(:df){
472
+ Daru::DataFrame.new(
473
+ {
474
+ 'string1' => ["Color", "Color", "Color", "Color", nil, "Color", "Color", " Black and White"],
475
+ 'string2' => ["Test", "test2", nil, "test3", nil, "test", "test3", "test5"],
476
+ 'num' => [1, nil, 3, 4, 5, 6, 7, nil]
477
+ }
478
+ )
479
+ }
480
+
481
+ it 'groups by without errors' do
482
+ expect { df.group_by(df.vectors.map(&:to_s)) }.to_not raise_error(ArgumentError)
483
+ end
484
+ end
485
+
486
+ context '#aggregate' do
487
+ let(:dataframe) { Daru::DataFrame.new({
488
+ employee: %w[John Jane Mark John Jane Mark],
489
+ month: %w[June June June July July July],
490
+ salary: [1000, 500, 700, 1200, 600, 600]})
491
+ }
492
+ context 'group and aggregate sum for particular single vector' do
493
+ subject { dataframe.group_by([:employee]).aggregate(salary: :sum) }
494
+
495
+ it { is_expected.to eq Daru::DataFrame.new({
496
+ salary: [1100, 2200, 1300]},
497
+ index: ['Jane', 'John', 'Mark'])
498
+ }
499
+ end
500
+
501
+ context 'group and aggregate sum for two vectors' do
502
+ subject {
503
+ dataframe.group_by([:employee, :month]).aggregate(salary: :sum) }
504
+
505
+ it { is_expected.to eq Daru::DataFrame.new({
506
+ salary: [600, 500, 1200, 1000, 600, 700]},
507
+ index: Daru::MultiIndex.from_tuples([
508
+ ['Jane', 'July'],
509
+ ['Jane', 'June'],
510
+ ['John', 'July'],
511
+ ['John', 'June'],
512
+ ['Mark', 'July'],
513
+ ['Mark', 'June']
514
+ ])
515
+ )}
516
+ end
517
+
518
+ context 'group and aggregate sum and lambda function for vectors' do
519
+ subject { dataframe.group_by([:employee]).aggregate(
520
+ salary: :sum,
521
+ month: ->(vec) { vec.to_a.join('/') }) }
522
+
523
+ it { is_expected.to eq Daru::DataFrame.new({
524
+ salary: [1100, 2200, 1300],
525
+ month: ['June/July', 'June/July', 'June/July']},
526
+ index: ['Jane', 'John', 'Mark'],
527
+ order: [:salary, :month])
528
+ }
529
+ end
530
+
531
+ context 'group and aggregate sum and lambda functions on dataframe' do
532
+ subject { dataframe.group_by([:employee]).aggregate(
533
+ salary: :sum,
534
+ month: ->(vec) { vec.to_a.join('/') },
535
+ mean_salary: ->(df) { df.salary.mean },
536
+ periods: ->(df) { df.size }
537
+ )}
538
+
539
+ it { is_expected.to eq Daru::DataFrame.new({
540
+ salary: [1100, 2200, 1300],
541
+ month: ['June/July', 'June/July', 'June/July'],
542
+ mean_salary: [550.0, 1100.0, 650.0],
543
+ periods: [2, 2, 2]},
544
+ index: ['Jane', 'John', 'Mark'], order: [:salary, :month,
545
+ :mean_salary, :periods]) }
546
+ end
547
+
548
+ context 'group_by and aggregate on mixed MultiIndex' do
549
+ let(:df) { Daru::DataFrame.new(
550
+ name: ['Ram','Krishna','Ram','Krishna','Krishna'],
551
+ visited: [
552
+ 'Hyderabad', 'Delhi', 'Mumbai', 'Raipur', 'Banglore']
553
+ )
554
+ }
555
+ let(:df_mixed) { Daru::DataFrame.new(
556
+ name: ['Krishna','Ram','Krishna','Krishna'],
557
+ visited: [
558
+ 'Delhi', 'Mumbai', 'Raipur', 'Banglore']
559
+ )
560
+ }
561
+ it 'group_by' do
562
+ expect(df.group_by(:name).df).to eq(
563
+ Daru::DataFrame.new({
564
+ visited: ['Delhi', 'Raipur', 'Banglore', 'Hyderabad', 'Mumbai']},
565
+ index: Daru::MultiIndex.from_tuples(
566
+ [['Krishna', 1], ['Krishna', 3], ['Krishna', 4],
567
+ ['Ram', 0], ['Ram', 2]]
568
+ )
569
+ )
570
+ )
571
+ end
572
+
573
+ it 'group_by and aggregate' do
574
+ expect(
575
+ df.group_by(:name).aggregate(
576
+ visited: -> (vec){vec.to_a.join(',')})).to eq(
577
+ Daru::DataFrame.new({
578
+ visited: ['Delhi,Raipur,Banglore', 'Hyderabad,Mumbai']},
579
+ index: ['Krishna', 'Ram']
580
+ )
581
+ )
582
+ end
583
+
584
+ it 'group_by and aggregate when anyone index is not multiple times' do
585
+ expect(
586
+ df_mixed.group_by(:name).aggregate(
587
+ visited: -> (vec){vec.to_a.join(',')})).to eq(
588
+ Daru::DataFrame.new({
589
+ visited: ['Delhi,Raipur,Banglore', 'Mumbai']},
590
+ index: ['Krishna', 'Ram']
591
+ )
592
+ )
593
+ end
594
+ end
595
+ end
468
596
  end
@@ -145,8 +145,15 @@ describe Daru::DataFrame do
145
145
  end
146
146
 
147
147
  context "#initialize" do
148
+
149
+ it "initializes an empty DataFrame with no arguments" do
150
+ df = Daru::DataFrame.new
151
+ expect(df.nrows).to eq(0)
152
+ expect(df.ncols).to eq(0)
153
+ end
154
+
148
155
  context Daru::Index do
149
- it "initializes an empty DataFrame" do
156
+ it "initializes an empty DataFrame with empty source arg" do
150
157
  df = Daru::DataFrame.new({}, order: [:a, :b])
151
158
 
152
159
  expect(df.vectors).to eq(Daru::Index.new [:a, :b])
@@ -164,6 +171,13 @@ describe Daru::DataFrame do
164
171
  expect(df.a) .to eq([1,2,3,4,5].dv(:a, df.index))
165
172
  end
166
173
 
174
+ it "initializes from a Hash and preserves default order" do
175
+ df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
176
+ index: [:one, :two, :three, :four, :five])
177
+
178
+ expect(df.vectors).to eq(Daru::Index.new [:b, :a])
179
+ end
180
+
167
181
  it "initializes from a Hash of Vectors" do
168
182
  va = Daru::Vector.new([1,2,3,4,5], index: [:one, :two, :three, :four, :five])
169
183
  vb = Daru::Vector.new([11,12,13,14,15], index: [:one, :two, :three, :four, :five])
@@ -221,7 +235,7 @@ describe Daru::DataFrame do
221
235
  df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]})
222
236
 
223
237
  expect(df.index) .to eq(Daru::Index.new [0,1,2,3,4])
224
- expect(df.vectors).to eq(Daru::Index.new [:a, :b])
238
+ expect(df.vectors).to eq(Daru::Index.new [:b, :a])
225
239
  end
226
240
 
227
241
  it "aligns indexes properly" do
@@ -1573,6 +1587,22 @@ describe Daru::DataFrame do
1573
1587
  }
1574
1588
  end
1575
1589
 
1590
+ context 'with mulitiindex DF' do
1591
+ subject(:data_frame) {
1592
+ Daru::DataFrame.new({b: [11,12,13], a: [1,2,3],
1593
+ c: [11,22,33]}, order: [:a, :b, :c],
1594
+ index: Daru::MultiIndex.from_tuples([[:one, :two], [:one, :three], [:two, :four]]))
1595
+ }
1596
+
1597
+ before { data_frame.add_row [100,200,300], [:two, :five] }
1598
+
1599
+ it { is_expected.to eq(Daru::DataFrame.new({
1600
+ b: [11,12,13,200], a: [1,2,3,100],
1601
+ c: [11,22,33,300]}, order: [:a, :b, :c],
1602
+ index: Daru::MultiIndex.from_tuples([[:one, :two], [:one, :three], [:two, :four], [:two, :five]])))
1603
+ }
1604
+ end
1605
+
1576
1606
  it "allows adding rows after making empty DF by specfying only order" do
1577
1607
  df = Daru::DataFrame.new({}, order: [:a, :b, :c])
1578
1608
  df.add_row [1,2,3]
@@ -1790,6 +1820,59 @@ describe Daru::DataFrame do
1790
1820
  end
1791
1821
  end
1792
1822
 
1823
+ describe 'uniq' do
1824
+ let(:df) do
1825
+ Daru::DataFrame.from_csv 'spec/fixtures/duplicates.csv'
1826
+ end
1827
+
1828
+ context 'with no args' do
1829
+ it do
1830
+ result = df.uniq
1831
+ expect(result.shape.first).to eq 30
1832
+ end
1833
+ end
1834
+
1835
+ context 'given a vector' do
1836
+ it do
1837
+ result = df.uniq("color")
1838
+ expect(result.shape.first).to eq 2
1839
+ end
1840
+ end
1841
+
1842
+ context 'given an array of vectors' do
1843
+ it do
1844
+ result = df.uniq("color", "director_name")
1845
+ expect(result.shape.first).to eq 29
1846
+ end
1847
+ end
1848
+ end
1849
+
1850
+ context '#rolling_fillna!' do
1851
+ subject do
1852
+ Daru::DataFrame.new({
1853
+ a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
1854
+ b: [:a, :b, nil, Float::NAN, nil, 3, 5, nil],
1855
+ c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
1856
+ })
1857
+ end
1858
+
1859
+ context 'rolling_fillna! forwards' do
1860
+ before { subject.rolling_fillna!(:forward) }
1861
+ it { is_expected.to be_a Daru::DataFrame }
1862
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, 3, 3, 3, 1, 7] }
1863
+ its(:'b.to_a') { is_expected.to eq [:a, :b, :b, :b, :b, 3, 5, 5] }
1864
+ its(:'c.to_a') { is_expected.to eq ['a', 'a', 3, 4, 3, 5, 5, 7] }
1865
+ end
1866
+
1867
+ context 'rolling_fillna! backwards' do
1868
+ before { subject.rolling_fillna!(:backward) }
1869
+ it { is_expected.to be_a Daru::DataFrame }
1870
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, 1, 1, 1, 1, 7] }
1871
+ its(:'b.to_a') { is_expected.to eq [:a, :b, 3, 3, 3, 3, 5, 0] }
1872
+ its(:'c.to_a') { is_expected.to eq ['a', 3, 3, 4, 3, 5, 7, 7] }
1873
+ end
1874
+ end
1875
+
1793
1876
  context "#clone" do
1794
1877
  it "returns a view of the whole dataframe" do
1795
1878
  cloned = @data_frame.clone
@@ -3160,6 +3243,15 @@ describe Daru::DataFrame do
3160
3243
  it { expect(subject['a'].to_a).to eq [1, 2, 3] }
3161
3244
  it { expect(subject[:b].to_a).to eq [4, 5, 6] }
3162
3245
  end
3246
+
3247
+ context "preserves indices for dataframes with same index" do
3248
+ let(:index) { ['one','two','three'] }
3249
+ let(:df1) { Daru::DataFrame.new({ 'a' => [1, 2, 3], 'b' => [3, 4, 5] }, index: index) }
3250
+ let(:df2) { Daru::DataFrame.new({ 'c' => [4, 5, 6], 'd' => [7, 8, 9] }, index: index) }
3251
+ subject { df1.merge df2 }
3252
+
3253
+ its(:index) { is_expected.to eq Daru::Index.new(index) }
3254
+ end
3163
3255
  end
3164
3256
 
3165
3257
  context "#vector_by_calculation" do
@@ -3176,23 +3268,27 @@ describe Daru::DataFrame do
3176
3268
 
3177
3269
  context "#vector_sum" do
3178
3270
  before do
3179
- a1 = Daru::Vector.new [1, 2, 3, 4, 5, nil]
3180
- a2 = Daru::Vector.new [10, 10, 20, 20, 20, 30]
3181
- b1 = Daru::Vector.new [nil, 1, 1, 1, 1, 2]
3182
- b2 = Daru::Vector.new [2, 2, 2, nil, 2, 3]
3271
+ a1 = Daru::Vector.new [1, 2, 3, 4, 5, nil, nil]
3272
+ a2 = Daru::Vector.new [10, 10, 20, 20, 20, 30, nil]
3273
+ b1 = Daru::Vector.new [nil, 1, 1, 1, 1, 2, nil]
3274
+ b2 = Daru::Vector.new [2, 2, 2, nil, 2, 3, nil]
3183
3275
  @df = Daru::DataFrame.new({ :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2 })
3184
3276
  end
3185
3277
 
3186
3278
  it "calculates complete vector sum" do
3187
- expect(@df.vector_sum).to eq(Daru::Vector.new [nil, 15, 26, nil, 28, nil])
3279
+ expect(@df.vector_sum).to eq(Daru::Vector.new [nil, 15, 26, nil, 28, nil, nil])
3280
+ end
3281
+
3282
+ it "ignores nils if skipnil is true" do
3283
+ expect(@df.vector_sum skipnil: true).to eq(Daru::Vector.new [13, 15, 26, 25, 28, 35, 0])
3188
3284
  end
3189
3285
 
3190
3286
  it "calculates partial vector sum" do
3191
3287
  a = @df.vector_sum([:a1, :a2])
3192
3288
  b = @df.vector_sum([:b1, :b2])
3193
3289
 
3194
- expect(a).to eq(Daru::Vector.new [11, 12, 23, 24, 25, nil])
3195
- expect(b).to eq(Daru::Vector.new [nil, 3, 3, nil, 3, 5])
3290
+ expect(a).to eq(Daru::Vector.new [11, 12, 23, 24, 25, nil, nil])
3291
+ expect(b).to eq(Daru::Vector.new [nil, 3, 3, nil, 3, 5, nil])
3196
3292
  end
3197
3293
  end
3198
3294
 
@@ -3380,7 +3476,8 @@ describe Daru::DataFrame do
3380
3476
  ev_b = Daru::Vector.new [1, 1, 0]
3381
3477
  ev_c = Daru::Vector.new [0, 1, 1]
3382
3478
  df2 = Daru::DataFrame.new({
3383
- :_id => ev_id, 'a' => ev_a, 'b' => ev_b, 'c' => ev_c })
3479
+ :_id => ev_id, 'a' => ev_a, 'b' => ev_b, 'c' => ev_c },
3480
+ order: ['a', 'b', 'c', :_id])
3384
3481
 
3385
3482
  expect(df2).to eq(df)
3386
3483
  end
@@ -3928,6 +4025,24 @@ describe Daru::DataFrame do
3928
4025
  end
3929
4026
  end
3930
4027
 
4028
+ context '#aggregate' do
4029
+ let(:cat_idx) { Daru::CategoricalIndex.new [:a, :b, :a, :a, :c] }
4030
+ let(:df) { Daru::DataFrame.new(num: [52,12,07,17,01], cat_index: cat_idx) }
4031
+ let(:df_cat_idx) {
4032
+ Daru::DataFrame.new({num: [52,12,07,17,01]}, index: cat_idx) }
4033
+
4034
+ it 'lambda function on particular column' do
4035
+ expect(df.aggregate(num_100_times: ->(df) { df.num*100 })).to eq(
4036
+ Daru::DataFrame.new(num_100_times: [5200, 1200, 700, 1700, 100])
4037
+ )
4038
+ end
4039
+ it 'aggregate sum on particular column' do
4040
+ expect(df_cat_idx.aggregate(num: :sum)).to eq(
4041
+ Daru::DataFrame.new({num: [76, 12, 1]}, index: [:a, :b, :c])
4042
+ )
4043
+ end
4044
+ end
4045
+
3931
4046
  context '#create_sql' do
3932
4047
  let(:df) { Daru::DataFrame.new({
3933
4048
  a: [1,2,3],