daru 0.1.6 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.rubocop.yml +1 -0
- data/.travis.yml +5 -0
- data/History.md +28 -0
- data/README.md +6 -0
- data/ReleasePolicy.md +20 -0
- data/daru.gemspec +4 -0
- data/lib/daru.rb +1 -2
- data/lib/daru/category.rb +15 -10
- data/lib/daru/core/group_by.rb +51 -8
- data/lib/daru/dataframe.rb +267 -28
- data/lib/daru/date_time/index.rb +1 -1
- data/lib/daru/date_time/offsets.rb +1 -1
- data/lib/daru/extensions/which_dsl.rb +55 -0
- data/lib/daru/index/categorical_index.rb +4 -4
- data/lib/daru/index/index.rb +5 -5
- data/lib/daru/index/multi_index.rb +11 -2
- data/lib/daru/io/io.rb +1 -1
- data/lib/daru/maths/arithmetic/vector.rb +38 -2
- data/lib/daru/maths/statistics/dataframe.rb +19 -19
- data/lib/daru/maths/statistics/vector.rb +225 -78
- data/lib/daru/plotting/nyaplot/dataframe.rb +11 -0
- data/lib/daru/vector.rb +55 -13
- data/lib/daru/version.rb +1 -1
- data/profile/vector_new.rb +9 -0
- data/spec/category_spec.rb +5 -1
- data/spec/core/group_by_spec.rb +128 -0
- data/spec/dataframe_spec.rb +125 -10
- data/spec/extensions/which_dsl_spec.rb +38 -0
- data/spec/fixtures/duplicates.csv +32 -0
- data/spec/io/io_spec.rb +2 -2
- data/spec/maths/arithmetic/vector_spec.rb +18 -0
- data/spec/maths/statistics/vector_spec.rb +54 -38
- data/spec/plotting/nyaplot/dataframe_spec.rb +23 -0
- data/spec/spec_helper.rb +1 -1
- data/spec/vector_spec.rb +39 -0
- metadata +25 -3
@@ -21,6 +21,8 @@ module Daru
|
|
21
21
|
# df = Daru::DataFrame.new({a:['A', 'B', 'C', 'D', 'E'], b:[10,20,30,40,50]})
|
22
22
|
# df.plot type: :bar, x: :a, y: :b
|
23
23
|
def plot opts={}, &block
|
24
|
+
index_as_default_x_axis(opts) unless x_axis_defined?(opts)
|
25
|
+
|
24
26
|
if opts[:categorized]
|
25
27
|
plot_with_category(opts, &block)
|
26
28
|
else
|
@@ -30,6 +32,15 @@ module Daru
|
|
30
32
|
|
31
33
|
private
|
32
34
|
|
35
|
+
def x_axis_defined?(opts)
|
36
|
+
opts[:x] || opts.keys.any? { |k| k.to_s.match(/x\d+/) }
|
37
|
+
end
|
38
|
+
|
39
|
+
def index_as_default_x_axis(opts)
|
40
|
+
opts[:x] = :_index
|
41
|
+
self[:_index] = @index.to_a
|
42
|
+
end
|
43
|
+
|
33
44
|
def plot_without_category opts
|
34
45
|
options = {type: :scatter}.merge(opts)
|
35
46
|
|
data/lib/daru/vector.rb
CHANGED
@@ -228,7 +228,7 @@ module Daru
|
|
228
228
|
end
|
229
229
|
|
230
230
|
# Returns vector of values given positional values
|
231
|
-
# @param [Array<object>]
|
231
|
+
# @param positions [Array<object>] positional values
|
232
232
|
# @return [object] vector
|
233
233
|
# @example
|
234
234
|
# dv = Daru::Vector.new 'a'..'e'
|
@@ -252,7 +252,7 @@ module Daru
|
|
252
252
|
end
|
253
253
|
|
254
254
|
# Change value at given positions
|
255
|
-
# @param [Array<object>]
|
255
|
+
# @param positions [Array<object>] positional values
|
256
256
|
# @param [object] val value to assign
|
257
257
|
# @example
|
258
258
|
# dv = Daru::Vector.new 'a'..'e'
|
@@ -385,11 +385,11 @@ module Daru
|
|
385
385
|
# comparator methods to obtain meaningful results. See this notebook for
|
386
386
|
# a good overview of using #where.
|
387
387
|
#
|
388
|
-
# @param [Daru::Core::Query::BoolArray, Array<TrueClass, FalseClass>]
|
388
|
+
# @param bool_array [Daru::Core::Query::BoolArray, Array<TrueClass, FalseClass>] The
|
389
389
|
# collection containing the true of false values. Each element in the Vector
|
390
390
|
# corresponding to a `true` in the bool_arry will be returned alongwith it's
|
391
391
|
# index.
|
392
|
-
# @
|
392
|
+
# @example Usage of #where.
|
393
393
|
# vector = Daru::Vector.new([2,4,5,51,5,16,2,5,3,2,1,5,2,5,2,1,56,234,6,21])
|
394
394
|
#
|
395
395
|
# # Simple logic statement passed to #where.
|
@@ -451,7 +451,7 @@ module Daru
|
|
451
451
|
deprecate :flawed?, :include_values?, 2016, 10
|
452
452
|
|
453
453
|
# Check if any one of mentioned values occur in the vector
|
454
|
-
# @param [Array]
|
454
|
+
# @param values [Array] values to check for
|
455
455
|
# @return [true, false] returns true if any one of specified values
|
456
456
|
# occur in the vector
|
457
457
|
# @example
|
@@ -467,7 +467,7 @@ module Daru
|
|
467
467
|
# Return vector of booleans with value at ith position is either
|
468
468
|
# true or false depending upon whether value at position i is equal to
|
469
469
|
# any of the values passed in the argument or not
|
470
|
-
# @param [Array]
|
470
|
+
# @param values [Array] values to equate with
|
471
471
|
# @return [Daru::Vector] vector of boolean values
|
472
472
|
# @example
|
473
473
|
# dv = Daru::Vector.new [1, 2, 3, 2, 1]
|
@@ -760,6 +760,43 @@ module Daru
|
|
760
760
|
self
|
761
761
|
end
|
762
762
|
|
763
|
+
# Rolling fillna
|
764
|
+
# replace all Float::NAN and NIL values with the preceeding or following value
|
765
|
+
#
|
766
|
+
# @param direction [Symbol] (:forward, :backward) whether replacement value is preceeding or following
|
767
|
+
#
|
768
|
+
# @example
|
769
|
+
# dv = Daru::Vector.new([1, 2, 1, 4, nil, Float::NAN, 3, nil, Float::NAN])
|
770
|
+
#
|
771
|
+
# 2.3.3 :068 > dv.rolling_fillna(:forward)
|
772
|
+
# => #<Daru::Vector(9)>
|
773
|
+
# 0 1
|
774
|
+
# 1 2
|
775
|
+
# 2 1
|
776
|
+
# 3 4
|
777
|
+
# 4 4
|
778
|
+
# 5 4
|
779
|
+
# 6 3
|
780
|
+
# 7 3
|
781
|
+
# 8 3
|
782
|
+
#
|
783
|
+
def rolling_fillna!(direction=:forward)
|
784
|
+
enum = direction == :forward ? index : index.reverse_each
|
785
|
+
last_valid_value = 0
|
786
|
+
enum.each do |idx|
|
787
|
+
if valid_value?(self[idx])
|
788
|
+
last_valid_value = self[idx]
|
789
|
+
else
|
790
|
+
self[idx] = last_valid_value
|
791
|
+
end
|
792
|
+
end
|
793
|
+
end
|
794
|
+
|
795
|
+
# Non-destructive version of rolling_fillna!
|
796
|
+
def rolling_fillna(direction=:forward)
|
797
|
+
dup.rolling_fillna!(direction)
|
798
|
+
end
|
799
|
+
|
763
800
|
# Lags the series by `k` periods.
|
764
801
|
#
|
765
802
|
# Lags the series by `k` periods, "shifting" data and inserting `nil`s
|
@@ -818,7 +855,7 @@ module Daru
|
|
818
855
|
deprecate :n_valid, :count_values, 2016, 10
|
819
856
|
|
820
857
|
# Count the number of values specified
|
821
|
-
# @param [Array]
|
858
|
+
# @param values [Array] values to count for
|
822
859
|
# @return [Integer] the number of times the values mentioned occurs
|
823
860
|
# @example
|
824
861
|
# dv = Daru::Vector.new [1, 2, 1, 2, 3, 4, nil, nil]
|
@@ -942,7 +979,7 @@ module Daru
|
|
942
979
|
end
|
943
980
|
|
944
981
|
# Create a summary of the Vector
|
945
|
-
# @
|
982
|
+
# @param indent_level [Fixnum] indent level
|
946
983
|
# @return [String] String containing the summary of the Vector
|
947
984
|
# @example
|
948
985
|
# dv = Daru::Vector.new [1, 2, 3]
|
@@ -1182,10 +1219,10 @@ module Daru
|
|
1182
1219
|
#
|
1183
1220
|
# == Arguments
|
1184
1221
|
#
|
1185
|
-
# @as_a [Symbol] Passing :array will return only the elements
|
1222
|
+
# @param as_a [Symbol] Passing :array will return only the elements
|
1186
1223
|
# as an Array. Otherwise will return a Daru::Vector.
|
1187
1224
|
#
|
1188
|
-
# @
|
1225
|
+
# @param _duplicate [Symbol] In case no missing data is found in the
|
1189
1226
|
# vector, setting this to false will return the same vector.
|
1190
1227
|
# Otherwise, a duplicate will be returned irrespective of
|
1191
1228
|
# presence of missing data.
|
@@ -1207,7 +1244,7 @@ module Daru
|
|
1207
1244
|
deprecate :only_valid, :reject_values, 2016, 10
|
1208
1245
|
|
1209
1246
|
# Return a vector with specified values removed
|
1210
|
-
# @param [Array]
|
1247
|
+
# @param values [Array] values to reject from resultant vector
|
1211
1248
|
# @return [Daru::Vector] vector with specified values removed
|
1212
1249
|
# @example
|
1213
1250
|
# dv = Daru::Vector.new [1, 2, nil, Float::NAN]
|
@@ -1229,7 +1266,7 @@ module Daru
|
|
1229
1266
|
end
|
1230
1267
|
|
1231
1268
|
# Return indexes of values specified
|
1232
|
-
# @param [Array]
|
1269
|
+
# @param values [Array] values to find indexes for
|
1233
1270
|
# @return [Array] array of indexes of values specified
|
1234
1271
|
# @example
|
1235
1272
|
# dv = Daru::Vector.new [1, 2, nil, Float::NAN], index: 11..14
|
@@ -1437,6 +1474,11 @@ module Daru
|
|
1437
1474
|
end
|
1438
1475
|
end
|
1439
1476
|
|
1477
|
+
# Helper method returning validity of arbitrary value
|
1478
|
+
def valid_value?(v)
|
1479
|
+
v.respond_to?(:nan?) && v.nan? || v.nil? ? false : true
|
1480
|
+
end
|
1481
|
+
|
1440
1482
|
def initialize_vector source, opts
|
1441
1483
|
index, source = parse_source(source, opts)
|
1442
1484
|
set_name opts[:name]
|
@@ -1502,7 +1544,7 @@ module Daru
|
|
1502
1544
|
end
|
1503
1545
|
|
1504
1546
|
# Note: To maintain sanity, this _MUST_ be the _ONLY_ place in daru where the
|
1505
|
-
# @dtype variable is set and the underlying data type of vector changed.
|
1547
|
+
# @param dtype [db_type] variable is set and the underlying data type of vector changed.
|
1506
1548
|
def cast_vector_to dtype, source=nil, nm_dtype=nil
|
1507
1549
|
source = @data.to_a if source.nil?
|
1508
1550
|
|
data/lib/daru/version.rb
CHANGED
data/spec/category_spec.rb
CHANGED
@@ -1596,7 +1596,7 @@ describe Daru::DataFrame, "categorical" do
|
|
1596
1596
|
Daru::DataFrame.new({
|
1597
1597
|
a: [1, 2, 3, 4, 5],
|
1598
1598
|
b: ['first', 'second', 'first', 'second', 'third'],
|
1599
|
-
c: ['a', 'b', 'a', 'b',
|
1599
|
+
c: ['a', 'b', 'a', 'b', nil]
|
1600
1600
|
})
|
1601
1601
|
end
|
1602
1602
|
before { df.to_category :b, :c }
|
@@ -1605,6 +1605,10 @@ describe Daru::DataFrame, "categorical" do
|
|
1605
1605
|
it { is_expected.to be_a Daru::DataFrame }
|
1606
1606
|
its(:'b.type') { is_expected.to eq :category }
|
1607
1607
|
its(:'c.type') { is_expected.to eq :category }
|
1608
|
+
its(:'a.count') { is_expected.to eq 5 }
|
1609
|
+
its(:'c.count') { is_expected.to eq 5 }
|
1610
|
+
it { expect(df.c.count('a')).to eq 2 }
|
1611
|
+
it { expect(df.c.count(nil)).to eq 1 }
|
1608
1612
|
end
|
1609
1613
|
|
1610
1614
|
context "#interact_code" do
|
data/spec/core/group_by_spec.rb
CHANGED
@@ -465,4 +465,132 @@ describe Daru::Core::GroupBy do
|
|
465
465
|
|
466
466
|
it { is_expected.to eq Daru::DataFrame.new({num: [6]}, index: ['a']) }
|
467
467
|
end
|
468
|
+
|
469
|
+
context 'when dataframe tuples contain nils in mismatching positions' do
|
470
|
+
|
471
|
+
let(:df){
|
472
|
+
Daru::DataFrame.new(
|
473
|
+
{
|
474
|
+
'string1' => ["Color", "Color", "Color", "Color", nil, "Color", "Color", " Black and White"],
|
475
|
+
'string2' => ["Test", "test2", nil, "test3", nil, "test", "test3", "test5"],
|
476
|
+
'num' => [1, nil, 3, 4, 5, 6, 7, nil]
|
477
|
+
}
|
478
|
+
)
|
479
|
+
}
|
480
|
+
|
481
|
+
it 'groups by without errors' do
|
482
|
+
expect { df.group_by(df.vectors.map(&:to_s)) }.to_not raise_error(ArgumentError)
|
483
|
+
end
|
484
|
+
end
|
485
|
+
|
486
|
+
context '#aggregate' do
|
487
|
+
let(:dataframe) { Daru::DataFrame.new({
|
488
|
+
employee: %w[John Jane Mark John Jane Mark],
|
489
|
+
month: %w[June June June July July July],
|
490
|
+
salary: [1000, 500, 700, 1200, 600, 600]})
|
491
|
+
}
|
492
|
+
context 'group and aggregate sum for particular single vector' do
|
493
|
+
subject { dataframe.group_by([:employee]).aggregate(salary: :sum) }
|
494
|
+
|
495
|
+
it { is_expected.to eq Daru::DataFrame.new({
|
496
|
+
salary: [1100, 2200, 1300]},
|
497
|
+
index: ['Jane', 'John', 'Mark'])
|
498
|
+
}
|
499
|
+
end
|
500
|
+
|
501
|
+
context 'group and aggregate sum for two vectors' do
|
502
|
+
subject {
|
503
|
+
dataframe.group_by([:employee, :month]).aggregate(salary: :sum) }
|
504
|
+
|
505
|
+
it { is_expected.to eq Daru::DataFrame.new({
|
506
|
+
salary: [600, 500, 1200, 1000, 600, 700]},
|
507
|
+
index: Daru::MultiIndex.from_tuples([
|
508
|
+
['Jane', 'July'],
|
509
|
+
['Jane', 'June'],
|
510
|
+
['John', 'July'],
|
511
|
+
['John', 'June'],
|
512
|
+
['Mark', 'July'],
|
513
|
+
['Mark', 'June']
|
514
|
+
])
|
515
|
+
)}
|
516
|
+
end
|
517
|
+
|
518
|
+
context 'group and aggregate sum and lambda function for vectors' do
|
519
|
+
subject { dataframe.group_by([:employee]).aggregate(
|
520
|
+
salary: :sum,
|
521
|
+
month: ->(vec) { vec.to_a.join('/') }) }
|
522
|
+
|
523
|
+
it { is_expected.to eq Daru::DataFrame.new({
|
524
|
+
salary: [1100, 2200, 1300],
|
525
|
+
month: ['June/July', 'June/July', 'June/July']},
|
526
|
+
index: ['Jane', 'John', 'Mark'],
|
527
|
+
order: [:salary, :month])
|
528
|
+
}
|
529
|
+
end
|
530
|
+
|
531
|
+
context 'group and aggregate sum and lambda functions on dataframe' do
|
532
|
+
subject { dataframe.group_by([:employee]).aggregate(
|
533
|
+
salary: :sum,
|
534
|
+
month: ->(vec) { vec.to_a.join('/') },
|
535
|
+
mean_salary: ->(df) { df.salary.mean },
|
536
|
+
periods: ->(df) { df.size }
|
537
|
+
)}
|
538
|
+
|
539
|
+
it { is_expected.to eq Daru::DataFrame.new({
|
540
|
+
salary: [1100, 2200, 1300],
|
541
|
+
month: ['June/July', 'June/July', 'June/July'],
|
542
|
+
mean_salary: [550.0, 1100.0, 650.0],
|
543
|
+
periods: [2, 2, 2]},
|
544
|
+
index: ['Jane', 'John', 'Mark'], order: [:salary, :month,
|
545
|
+
:mean_salary, :periods]) }
|
546
|
+
end
|
547
|
+
|
548
|
+
context 'group_by and aggregate on mixed MultiIndex' do
|
549
|
+
let(:df) { Daru::DataFrame.new(
|
550
|
+
name: ['Ram','Krishna','Ram','Krishna','Krishna'],
|
551
|
+
visited: [
|
552
|
+
'Hyderabad', 'Delhi', 'Mumbai', 'Raipur', 'Banglore']
|
553
|
+
)
|
554
|
+
}
|
555
|
+
let(:df_mixed) { Daru::DataFrame.new(
|
556
|
+
name: ['Krishna','Ram','Krishna','Krishna'],
|
557
|
+
visited: [
|
558
|
+
'Delhi', 'Mumbai', 'Raipur', 'Banglore']
|
559
|
+
)
|
560
|
+
}
|
561
|
+
it 'group_by' do
|
562
|
+
expect(df.group_by(:name).df).to eq(
|
563
|
+
Daru::DataFrame.new({
|
564
|
+
visited: ['Delhi', 'Raipur', 'Banglore', 'Hyderabad', 'Mumbai']},
|
565
|
+
index: Daru::MultiIndex.from_tuples(
|
566
|
+
[['Krishna', 1], ['Krishna', 3], ['Krishna', 4],
|
567
|
+
['Ram', 0], ['Ram', 2]]
|
568
|
+
)
|
569
|
+
)
|
570
|
+
)
|
571
|
+
end
|
572
|
+
|
573
|
+
it 'group_by and aggregate' do
|
574
|
+
expect(
|
575
|
+
df.group_by(:name).aggregate(
|
576
|
+
visited: -> (vec){vec.to_a.join(',')})).to eq(
|
577
|
+
Daru::DataFrame.new({
|
578
|
+
visited: ['Delhi,Raipur,Banglore', 'Hyderabad,Mumbai']},
|
579
|
+
index: ['Krishna', 'Ram']
|
580
|
+
)
|
581
|
+
)
|
582
|
+
end
|
583
|
+
|
584
|
+
it 'group_by and aggregate when anyone index is not multiple times' do
|
585
|
+
expect(
|
586
|
+
df_mixed.group_by(:name).aggregate(
|
587
|
+
visited: -> (vec){vec.to_a.join(',')})).to eq(
|
588
|
+
Daru::DataFrame.new({
|
589
|
+
visited: ['Delhi,Raipur,Banglore', 'Mumbai']},
|
590
|
+
index: ['Krishna', 'Ram']
|
591
|
+
)
|
592
|
+
)
|
593
|
+
end
|
594
|
+
end
|
595
|
+
end
|
468
596
|
end
|
data/spec/dataframe_spec.rb
CHANGED
@@ -145,8 +145,15 @@ describe Daru::DataFrame do
|
|
145
145
|
end
|
146
146
|
|
147
147
|
context "#initialize" do
|
148
|
+
|
149
|
+
it "initializes an empty DataFrame with no arguments" do
|
150
|
+
df = Daru::DataFrame.new
|
151
|
+
expect(df.nrows).to eq(0)
|
152
|
+
expect(df.ncols).to eq(0)
|
153
|
+
end
|
154
|
+
|
148
155
|
context Daru::Index do
|
149
|
-
it "initializes an empty DataFrame" do
|
156
|
+
it "initializes an empty DataFrame with empty source arg" do
|
150
157
|
df = Daru::DataFrame.new({}, order: [:a, :b])
|
151
158
|
|
152
159
|
expect(df.vectors).to eq(Daru::Index.new [:a, :b])
|
@@ -164,6 +171,13 @@ describe Daru::DataFrame do
|
|
164
171
|
expect(df.a) .to eq([1,2,3,4,5].dv(:a, df.index))
|
165
172
|
end
|
166
173
|
|
174
|
+
it "initializes from a Hash and preserves default order" do
|
175
|
+
df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
|
176
|
+
index: [:one, :two, :three, :four, :five])
|
177
|
+
|
178
|
+
expect(df.vectors).to eq(Daru::Index.new [:b, :a])
|
179
|
+
end
|
180
|
+
|
167
181
|
it "initializes from a Hash of Vectors" do
|
168
182
|
va = Daru::Vector.new([1,2,3,4,5], index: [:one, :two, :three, :four, :five])
|
169
183
|
vb = Daru::Vector.new([11,12,13,14,15], index: [:one, :two, :three, :four, :five])
|
@@ -221,7 +235,7 @@ describe Daru::DataFrame do
|
|
221
235
|
df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]})
|
222
236
|
|
223
237
|
expect(df.index) .to eq(Daru::Index.new [0,1,2,3,4])
|
224
|
-
expect(df.vectors).to eq(Daru::Index.new [:
|
238
|
+
expect(df.vectors).to eq(Daru::Index.new [:b, :a])
|
225
239
|
end
|
226
240
|
|
227
241
|
it "aligns indexes properly" do
|
@@ -1573,6 +1587,22 @@ describe Daru::DataFrame do
|
|
1573
1587
|
}
|
1574
1588
|
end
|
1575
1589
|
|
1590
|
+
context 'with mulitiindex DF' do
|
1591
|
+
subject(:data_frame) {
|
1592
|
+
Daru::DataFrame.new({b: [11,12,13], a: [1,2,3],
|
1593
|
+
c: [11,22,33]}, order: [:a, :b, :c],
|
1594
|
+
index: Daru::MultiIndex.from_tuples([[:one, :two], [:one, :three], [:two, :four]]))
|
1595
|
+
}
|
1596
|
+
|
1597
|
+
before { data_frame.add_row [100,200,300], [:two, :five] }
|
1598
|
+
|
1599
|
+
it { is_expected.to eq(Daru::DataFrame.new({
|
1600
|
+
b: [11,12,13,200], a: [1,2,3,100],
|
1601
|
+
c: [11,22,33,300]}, order: [:a, :b, :c],
|
1602
|
+
index: Daru::MultiIndex.from_tuples([[:one, :two], [:one, :three], [:two, :four], [:two, :five]])))
|
1603
|
+
}
|
1604
|
+
end
|
1605
|
+
|
1576
1606
|
it "allows adding rows after making empty DF by specfying only order" do
|
1577
1607
|
df = Daru::DataFrame.new({}, order: [:a, :b, :c])
|
1578
1608
|
df.add_row [1,2,3]
|
@@ -1790,6 +1820,59 @@ describe Daru::DataFrame do
|
|
1790
1820
|
end
|
1791
1821
|
end
|
1792
1822
|
|
1823
|
+
describe 'uniq' do
|
1824
|
+
let(:df) do
|
1825
|
+
Daru::DataFrame.from_csv 'spec/fixtures/duplicates.csv'
|
1826
|
+
end
|
1827
|
+
|
1828
|
+
context 'with no args' do
|
1829
|
+
it do
|
1830
|
+
result = df.uniq
|
1831
|
+
expect(result.shape.first).to eq 30
|
1832
|
+
end
|
1833
|
+
end
|
1834
|
+
|
1835
|
+
context 'given a vector' do
|
1836
|
+
it do
|
1837
|
+
result = df.uniq("color")
|
1838
|
+
expect(result.shape.first).to eq 2
|
1839
|
+
end
|
1840
|
+
end
|
1841
|
+
|
1842
|
+
context 'given an array of vectors' do
|
1843
|
+
it do
|
1844
|
+
result = df.uniq("color", "director_name")
|
1845
|
+
expect(result.shape.first).to eq 29
|
1846
|
+
end
|
1847
|
+
end
|
1848
|
+
end
|
1849
|
+
|
1850
|
+
context '#rolling_fillna!' do
|
1851
|
+
subject do
|
1852
|
+
Daru::DataFrame.new({
|
1853
|
+
a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
|
1854
|
+
b: [:a, :b, nil, Float::NAN, nil, 3, 5, nil],
|
1855
|
+
c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
|
1856
|
+
})
|
1857
|
+
end
|
1858
|
+
|
1859
|
+
context 'rolling_fillna! forwards' do
|
1860
|
+
before { subject.rolling_fillna!(:forward) }
|
1861
|
+
it { is_expected.to be_a Daru::DataFrame }
|
1862
|
+
its(:'a.to_a') { is_expected.to eq [1, 2, 3, 3, 3, 3, 1, 7] }
|
1863
|
+
its(:'b.to_a') { is_expected.to eq [:a, :b, :b, :b, :b, 3, 5, 5] }
|
1864
|
+
its(:'c.to_a') { is_expected.to eq ['a', 'a', 3, 4, 3, 5, 5, 7] }
|
1865
|
+
end
|
1866
|
+
|
1867
|
+
context 'rolling_fillna! backwards' do
|
1868
|
+
before { subject.rolling_fillna!(:backward) }
|
1869
|
+
it { is_expected.to be_a Daru::DataFrame }
|
1870
|
+
its(:'a.to_a') { is_expected.to eq [1, 2, 3, 1, 1, 1, 1, 7] }
|
1871
|
+
its(:'b.to_a') { is_expected.to eq [:a, :b, 3, 3, 3, 3, 5, 0] }
|
1872
|
+
its(:'c.to_a') { is_expected.to eq ['a', 3, 3, 4, 3, 5, 7, 7] }
|
1873
|
+
end
|
1874
|
+
end
|
1875
|
+
|
1793
1876
|
context "#clone" do
|
1794
1877
|
it "returns a view of the whole dataframe" do
|
1795
1878
|
cloned = @data_frame.clone
|
@@ -3160,6 +3243,15 @@ describe Daru::DataFrame do
|
|
3160
3243
|
it { expect(subject['a'].to_a).to eq [1, 2, 3] }
|
3161
3244
|
it { expect(subject[:b].to_a).to eq [4, 5, 6] }
|
3162
3245
|
end
|
3246
|
+
|
3247
|
+
context "preserves indices for dataframes with same index" do
|
3248
|
+
let(:index) { ['one','two','three'] }
|
3249
|
+
let(:df1) { Daru::DataFrame.new({ 'a' => [1, 2, 3], 'b' => [3, 4, 5] }, index: index) }
|
3250
|
+
let(:df2) { Daru::DataFrame.new({ 'c' => [4, 5, 6], 'd' => [7, 8, 9] }, index: index) }
|
3251
|
+
subject { df1.merge df2 }
|
3252
|
+
|
3253
|
+
its(:index) { is_expected.to eq Daru::Index.new(index) }
|
3254
|
+
end
|
3163
3255
|
end
|
3164
3256
|
|
3165
3257
|
context "#vector_by_calculation" do
|
@@ -3176,23 +3268,27 @@ describe Daru::DataFrame do
|
|
3176
3268
|
|
3177
3269
|
context "#vector_sum" do
|
3178
3270
|
before do
|
3179
|
-
a1 = Daru::Vector.new [1, 2, 3, 4, 5, nil]
|
3180
|
-
a2 = Daru::Vector.new [10, 10, 20, 20, 20, 30]
|
3181
|
-
b1 = Daru::Vector.new [nil, 1, 1, 1, 1, 2]
|
3182
|
-
b2 = Daru::Vector.new [2, 2, 2, nil, 2, 3]
|
3271
|
+
a1 = Daru::Vector.new [1, 2, 3, 4, 5, nil, nil]
|
3272
|
+
a2 = Daru::Vector.new [10, 10, 20, 20, 20, 30, nil]
|
3273
|
+
b1 = Daru::Vector.new [nil, 1, 1, 1, 1, 2, nil]
|
3274
|
+
b2 = Daru::Vector.new [2, 2, 2, nil, 2, 3, nil]
|
3183
3275
|
@df = Daru::DataFrame.new({ :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2 })
|
3184
3276
|
end
|
3185
3277
|
|
3186
3278
|
it "calculates complete vector sum" do
|
3187
|
-
expect(@df.vector_sum).to eq(Daru::Vector.new [nil, 15, 26, nil, 28, nil])
|
3279
|
+
expect(@df.vector_sum).to eq(Daru::Vector.new [nil, 15, 26, nil, 28, nil, nil])
|
3280
|
+
end
|
3281
|
+
|
3282
|
+
it "ignores nils if skipnil is true" do
|
3283
|
+
expect(@df.vector_sum skipnil: true).to eq(Daru::Vector.new [13, 15, 26, 25, 28, 35, 0])
|
3188
3284
|
end
|
3189
3285
|
|
3190
3286
|
it "calculates partial vector sum" do
|
3191
3287
|
a = @df.vector_sum([:a1, :a2])
|
3192
3288
|
b = @df.vector_sum([:b1, :b2])
|
3193
3289
|
|
3194
|
-
expect(a).to eq(Daru::Vector.new [11, 12, 23, 24, 25, nil])
|
3195
|
-
expect(b).to eq(Daru::Vector.new [nil, 3, 3, nil, 3, 5])
|
3290
|
+
expect(a).to eq(Daru::Vector.new [11, 12, 23, 24, 25, nil, nil])
|
3291
|
+
expect(b).to eq(Daru::Vector.new [nil, 3, 3, nil, 3, 5, nil])
|
3196
3292
|
end
|
3197
3293
|
end
|
3198
3294
|
|
@@ -3380,7 +3476,8 @@ describe Daru::DataFrame do
|
|
3380
3476
|
ev_b = Daru::Vector.new [1, 1, 0]
|
3381
3477
|
ev_c = Daru::Vector.new [0, 1, 1]
|
3382
3478
|
df2 = Daru::DataFrame.new({
|
3383
|
-
:_id => ev_id, 'a' => ev_a, 'b' => ev_b, 'c' => ev_c }
|
3479
|
+
:_id => ev_id, 'a' => ev_a, 'b' => ev_b, 'c' => ev_c },
|
3480
|
+
order: ['a', 'b', 'c', :_id])
|
3384
3481
|
|
3385
3482
|
expect(df2).to eq(df)
|
3386
3483
|
end
|
@@ -3928,6 +4025,24 @@ describe Daru::DataFrame do
|
|
3928
4025
|
end
|
3929
4026
|
end
|
3930
4027
|
|
4028
|
+
context '#aggregate' do
|
4029
|
+
let(:cat_idx) { Daru::CategoricalIndex.new [:a, :b, :a, :a, :c] }
|
4030
|
+
let(:df) { Daru::DataFrame.new(num: [52,12,07,17,01], cat_index: cat_idx) }
|
4031
|
+
let(:df_cat_idx) {
|
4032
|
+
Daru::DataFrame.new({num: [52,12,07,17,01]}, index: cat_idx) }
|
4033
|
+
|
4034
|
+
it 'lambda function on particular column' do
|
4035
|
+
expect(df.aggregate(num_100_times: ->(df) { df.num*100 })).to eq(
|
4036
|
+
Daru::DataFrame.new(num_100_times: [5200, 1200, 700, 1700, 100])
|
4037
|
+
)
|
4038
|
+
end
|
4039
|
+
it 'aggregate sum on particular column' do
|
4040
|
+
expect(df_cat_idx.aggregate(num: :sum)).to eq(
|
4041
|
+
Daru::DataFrame.new({num: [76, 12, 1]}, index: [:a, :b, :c])
|
4042
|
+
)
|
4043
|
+
end
|
4044
|
+
end
|
4045
|
+
|
3931
4046
|
context '#create_sql' do
|
3932
4047
|
let(:df) { Daru::DataFrame.new({
|
3933
4048
|
a: [1,2,3],
|