daru 0.1.6 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.rubocop.yml +1 -0
- data/.travis.yml +5 -0
- data/History.md +28 -0
- data/README.md +6 -0
- data/ReleasePolicy.md +20 -0
- data/daru.gemspec +4 -0
- data/lib/daru.rb +1 -2
- data/lib/daru/category.rb +15 -10
- data/lib/daru/core/group_by.rb +51 -8
- data/lib/daru/dataframe.rb +267 -28
- data/lib/daru/date_time/index.rb +1 -1
- data/lib/daru/date_time/offsets.rb +1 -1
- data/lib/daru/extensions/which_dsl.rb +55 -0
- data/lib/daru/index/categorical_index.rb +4 -4
- data/lib/daru/index/index.rb +5 -5
- data/lib/daru/index/multi_index.rb +11 -2
- data/lib/daru/io/io.rb +1 -1
- data/lib/daru/maths/arithmetic/vector.rb +38 -2
- data/lib/daru/maths/statistics/dataframe.rb +19 -19
- data/lib/daru/maths/statistics/vector.rb +225 -78
- data/lib/daru/plotting/nyaplot/dataframe.rb +11 -0
- data/lib/daru/vector.rb +55 -13
- data/lib/daru/version.rb +1 -1
- data/profile/vector_new.rb +9 -0
- data/spec/category_spec.rb +5 -1
- data/spec/core/group_by_spec.rb +128 -0
- data/spec/dataframe_spec.rb +125 -10
- data/spec/extensions/which_dsl_spec.rb +38 -0
- data/spec/fixtures/duplicates.csv +32 -0
- data/spec/io/io_spec.rb +2 -2
- data/spec/maths/arithmetic/vector_spec.rb +18 -0
- data/spec/maths/statistics/vector_spec.rb +54 -38
- data/spec/plotting/nyaplot/dataframe_spec.rb +23 -0
- data/spec/spec_helper.rb +1 -1
- data/spec/vector_spec.rb +39 -0
- metadata +25 -3
@@ -21,6 +21,8 @@ module Daru
|
|
21
21
|
# df = Daru::DataFrame.new({a:['A', 'B', 'C', 'D', 'E'], b:[10,20,30,40,50]})
|
22
22
|
# df.plot type: :bar, x: :a, y: :b
|
23
23
|
def plot opts={}, &block
|
24
|
+
index_as_default_x_axis(opts) unless x_axis_defined?(opts)
|
25
|
+
|
24
26
|
if opts[:categorized]
|
25
27
|
plot_with_category(opts, &block)
|
26
28
|
else
|
@@ -30,6 +32,15 @@ module Daru
|
|
30
32
|
|
31
33
|
private
|
32
34
|
|
35
|
+
def x_axis_defined?(opts)
|
36
|
+
opts[:x] || opts.keys.any? { |k| k.to_s.match(/x\d+/) }
|
37
|
+
end
|
38
|
+
|
39
|
+
def index_as_default_x_axis(opts)
|
40
|
+
opts[:x] = :_index
|
41
|
+
self[:_index] = @index.to_a
|
42
|
+
end
|
43
|
+
|
33
44
|
def plot_without_category opts
|
34
45
|
options = {type: :scatter}.merge(opts)
|
35
46
|
|
data/lib/daru/vector.rb
CHANGED
@@ -228,7 +228,7 @@ module Daru
|
|
228
228
|
end
|
229
229
|
|
230
230
|
# Returns vector of values given positional values
|
231
|
-
# @param [Array<object>]
|
231
|
+
# @param positions [Array<object>] positional values
|
232
232
|
# @return [object] vector
|
233
233
|
# @example
|
234
234
|
# dv = Daru::Vector.new 'a'..'e'
|
@@ -252,7 +252,7 @@ module Daru
|
|
252
252
|
end
|
253
253
|
|
254
254
|
# Change value at given positions
|
255
|
-
# @param [Array<object>]
|
255
|
+
# @param positions [Array<object>] positional values
|
256
256
|
# @param [object] val value to assign
|
257
257
|
# @example
|
258
258
|
# dv = Daru::Vector.new 'a'..'e'
|
@@ -385,11 +385,11 @@ module Daru
|
|
385
385
|
# comparator methods to obtain meaningful results. See this notebook for
|
386
386
|
# a good overview of using #where.
|
387
387
|
#
|
388
|
-
# @param [Daru::Core::Query::BoolArray, Array<TrueClass, FalseClass>]
|
388
|
+
# @param bool_array [Daru::Core::Query::BoolArray, Array<TrueClass, FalseClass>] The
|
389
389
|
# collection containing the true of false values. Each element in the Vector
|
390
390
|
# corresponding to a `true` in the bool_arry will be returned alongwith it's
|
391
391
|
# index.
|
392
|
-
# @
|
392
|
+
# @example Usage of #where.
|
393
393
|
# vector = Daru::Vector.new([2,4,5,51,5,16,2,5,3,2,1,5,2,5,2,1,56,234,6,21])
|
394
394
|
#
|
395
395
|
# # Simple logic statement passed to #where.
|
@@ -451,7 +451,7 @@ module Daru
|
|
451
451
|
deprecate :flawed?, :include_values?, 2016, 10
|
452
452
|
|
453
453
|
# Check if any one of mentioned values occur in the vector
|
454
|
-
# @param [Array]
|
454
|
+
# @param values [Array] values to check for
|
455
455
|
# @return [true, false] returns true if any one of specified values
|
456
456
|
# occur in the vector
|
457
457
|
# @example
|
@@ -467,7 +467,7 @@ module Daru
|
|
467
467
|
# Return vector of booleans with value at ith position is either
|
468
468
|
# true or false depending upon whether value at position i is equal to
|
469
469
|
# any of the values passed in the argument or not
|
470
|
-
# @param [Array]
|
470
|
+
# @param values [Array] values to equate with
|
471
471
|
# @return [Daru::Vector] vector of boolean values
|
472
472
|
# @example
|
473
473
|
# dv = Daru::Vector.new [1, 2, 3, 2, 1]
|
@@ -760,6 +760,43 @@ module Daru
|
|
760
760
|
self
|
761
761
|
end
|
762
762
|
|
763
|
+
# Rolling fillna
|
764
|
+
# replace all Float::NAN and NIL values with the preceeding or following value
|
765
|
+
#
|
766
|
+
# @param direction [Symbol] (:forward, :backward) whether replacement value is preceeding or following
|
767
|
+
#
|
768
|
+
# @example
|
769
|
+
# dv = Daru::Vector.new([1, 2, 1, 4, nil, Float::NAN, 3, nil, Float::NAN])
|
770
|
+
#
|
771
|
+
# 2.3.3 :068 > dv.rolling_fillna(:forward)
|
772
|
+
# => #<Daru::Vector(9)>
|
773
|
+
# 0 1
|
774
|
+
# 1 2
|
775
|
+
# 2 1
|
776
|
+
# 3 4
|
777
|
+
# 4 4
|
778
|
+
# 5 4
|
779
|
+
# 6 3
|
780
|
+
# 7 3
|
781
|
+
# 8 3
|
782
|
+
#
|
783
|
+
def rolling_fillna!(direction=:forward)
|
784
|
+
enum = direction == :forward ? index : index.reverse_each
|
785
|
+
last_valid_value = 0
|
786
|
+
enum.each do |idx|
|
787
|
+
if valid_value?(self[idx])
|
788
|
+
last_valid_value = self[idx]
|
789
|
+
else
|
790
|
+
self[idx] = last_valid_value
|
791
|
+
end
|
792
|
+
end
|
793
|
+
end
|
794
|
+
|
795
|
+
# Non-destructive version of rolling_fillna!
|
796
|
+
def rolling_fillna(direction=:forward)
|
797
|
+
dup.rolling_fillna!(direction)
|
798
|
+
end
|
799
|
+
|
763
800
|
# Lags the series by `k` periods.
|
764
801
|
#
|
765
802
|
# Lags the series by `k` periods, "shifting" data and inserting `nil`s
|
@@ -818,7 +855,7 @@ module Daru
|
|
818
855
|
deprecate :n_valid, :count_values, 2016, 10
|
819
856
|
|
820
857
|
# Count the number of values specified
|
821
|
-
# @param [Array]
|
858
|
+
# @param values [Array] values to count for
|
822
859
|
# @return [Integer] the number of times the values mentioned occurs
|
823
860
|
# @example
|
824
861
|
# dv = Daru::Vector.new [1, 2, 1, 2, 3, 4, nil, nil]
|
@@ -942,7 +979,7 @@ module Daru
|
|
942
979
|
end
|
943
980
|
|
944
981
|
# Create a summary of the Vector
|
945
|
-
# @
|
982
|
+
# @param indent_level [Fixnum] indent level
|
946
983
|
# @return [String] String containing the summary of the Vector
|
947
984
|
# @example
|
948
985
|
# dv = Daru::Vector.new [1, 2, 3]
|
@@ -1182,10 +1219,10 @@ module Daru
|
|
1182
1219
|
#
|
1183
1220
|
# == Arguments
|
1184
1221
|
#
|
1185
|
-
# @as_a [Symbol] Passing :array will return only the elements
|
1222
|
+
# @param as_a [Symbol] Passing :array will return only the elements
|
1186
1223
|
# as an Array. Otherwise will return a Daru::Vector.
|
1187
1224
|
#
|
1188
|
-
# @
|
1225
|
+
# @param _duplicate [Symbol] In case no missing data is found in the
|
1189
1226
|
# vector, setting this to false will return the same vector.
|
1190
1227
|
# Otherwise, a duplicate will be returned irrespective of
|
1191
1228
|
# presence of missing data.
|
@@ -1207,7 +1244,7 @@ module Daru
|
|
1207
1244
|
deprecate :only_valid, :reject_values, 2016, 10
|
1208
1245
|
|
1209
1246
|
# Return a vector with specified values removed
|
1210
|
-
# @param [Array]
|
1247
|
+
# @param values [Array] values to reject from resultant vector
|
1211
1248
|
# @return [Daru::Vector] vector with specified values removed
|
1212
1249
|
# @example
|
1213
1250
|
# dv = Daru::Vector.new [1, 2, nil, Float::NAN]
|
@@ -1229,7 +1266,7 @@ module Daru
|
|
1229
1266
|
end
|
1230
1267
|
|
1231
1268
|
# Return indexes of values specified
|
1232
|
-
# @param [Array]
|
1269
|
+
# @param values [Array] values to find indexes for
|
1233
1270
|
# @return [Array] array of indexes of values specified
|
1234
1271
|
# @example
|
1235
1272
|
# dv = Daru::Vector.new [1, 2, nil, Float::NAN], index: 11..14
|
@@ -1437,6 +1474,11 @@ module Daru
|
|
1437
1474
|
end
|
1438
1475
|
end
|
1439
1476
|
|
1477
|
+
# Helper method returning validity of arbitrary value
|
1478
|
+
def valid_value?(v)
|
1479
|
+
v.respond_to?(:nan?) && v.nan? || v.nil? ? false : true
|
1480
|
+
end
|
1481
|
+
|
1440
1482
|
def initialize_vector source, opts
|
1441
1483
|
index, source = parse_source(source, opts)
|
1442
1484
|
set_name opts[:name]
|
@@ -1502,7 +1544,7 @@ module Daru
|
|
1502
1544
|
end
|
1503
1545
|
|
1504
1546
|
# Note: To maintain sanity, this _MUST_ be the _ONLY_ place in daru where the
|
1505
|
-
# @dtype variable is set and the underlying data type of vector changed.
|
1547
|
+
# @param dtype [db_type] variable is set and the underlying data type of vector changed.
|
1506
1548
|
def cast_vector_to dtype, source=nil, nm_dtype=nil
|
1507
1549
|
source = @data.to_a if source.nil?
|
1508
1550
|
|
data/lib/daru/version.rb
CHANGED
data/spec/category_spec.rb
CHANGED
@@ -1596,7 +1596,7 @@ describe Daru::DataFrame, "categorical" do
|
|
1596
1596
|
Daru::DataFrame.new({
|
1597
1597
|
a: [1, 2, 3, 4, 5],
|
1598
1598
|
b: ['first', 'second', 'first', 'second', 'third'],
|
1599
|
-
c: ['a', 'b', 'a', 'b',
|
1599
|
+
c: ['a', 'b', 'a', 'b', nil]
|
1600
1600
|
})
|
1601
1601
|
end
|
1602
1602
|
before { df.to_category :b, :c }
|
@@ -1605,6 +1605,10 @@ describe Daru::DataFrame, "categorical" do
|
|
1605
1605
|
it { is_expected.to be_a Daru::DataFrame }
|
1606
1606
|
its(:'b.type') { is_expected.to eq :category }
|
1607
1607
|
its(:'c.type') { is_expected.to eq :category }
|
1608
|
+
its(:'a.count') { is_expected.to eq 5 }
|
1609
|
+
its(:'c.count') { is_expected.to eq 5 }
|
1610
|
+
it { expect(df.c.count('a')).to eq 2 }
|
1611
|
+
it { expect(df.c.count(nil)).to eq 1 }
|
1608
1612
|
end
|
1609
1613
|
|
1610
1614
|
context "#interact_code" do
|
data/spec/core/group_by_spec.rb
CHANGED
@@ -465,4 +465,132 @@ describe Daru::Core::GroupBy do
|
|
465
465
|
|
466
466
|
it { is_expected.to eq Daru::DataFrame.new({num: [6]}, index: ['a']) }
|
467
467
|
end
|
468
|
+
|
469
|
+
context 'when dataframe tuples contain nils in mismatching positions' do
|
470
|
+
|
471
|
+
let(:df){
|
472
|
+
Daru::DataFrame.new(
|
473
|
+
{
|
474
|
+
'string1' => ["Color", "Color", "Color", "Color", nil, "Color", "Color", " Black and White"],
|
475
|
+
'string2' => ["Test", "test2", nil, "test3", nil, "test", "test3", "test5"],
|
476
|
+
'num' => [1, nil, 3, 4, 5, 6, 7, nil]
|
477
|
+
}
|
478
|
+
)
|
479
|
+
}
|
480
|
+
|
481
|
+
it 'groups by without errors' do
|
482
|
+
expect { df.group_by(df.vectors.map(&:to_s)) }.to_not raise_error(ArgumentError)
|
483
|
+
end
|
484
|
+
end
|
485
|
+
|
486
|
+
context '#aggregate' do
|
487
|
+
let(:dataframe) { Daru::DataFrame.new({
|
488
|
+
employee: %w[John Jane Mark John Jane Mark],
|
489
|
+
month: %w[June June June July July July],
|
490
|
+
salary: [1000, 500, 700, 1200, 600, 600]})
|
491
|
+
}
|
492
|
+
context 'group and aggregate sum for particular single vector' do
|
493
|
+
subject { dataframe.group_by([:employee]).aggregate(salary: :sum) }
|
494
|
+
|
495
|
+
it { is_expected.to eq Daru::DataFrame.new({
|
496
|
+
salary: [1100, 2200, 1300]},
|
497
|
+
index: ['Jane', 'John', 'Mark'])
|
498
|
+
}
|
499
|
+
end
|
500
|
+
|
501
|
+
context 'group and aggregate sum for two vectors' do
|
502
|
+
subject {
|
503
|
+
dataframe.group_by([:employee, :month]).aggregate(salary: :sum) }
|
504
|
+
|
505
|
+
it { is_expected.to eq Daru::DataFrame.new({
|
506
|
+
salary: [600, 500, 1200, 1000, 600, 700]},
|
507
|
+
index: Daru::MultiIndex.from_tuples([
|
508
|
+
['Jane', 'July'],
|
509
|
+
['Jane', 'June'],
|
510
|
+
['John', 'July'],
|
511
|
+
['John', 'June'],
|
512
|
+
['Mark', 'July'],
|
513
|
+
['Mark', 'June']
|
514
|
+
])
|
515
|
+
)}
|
516
|
+
end
|
517
|
+
|
518
|
+
context 'group and aggregate sum and lambda function for vectors' do
|
519
|
+
subject { dataframe.group_by([:employee]).aggregate(
|
520
|
+
salary: :sum,
|
521
|
+
month: ->(vec) { vec.to_a.join('/') }) }
|
522
|
+
|
523
|
+
it { is_expected.to eq Daru::DataFrame.new({
|
524
|
+
salary: [1100, 2200, 1300],
|
525
|
+
month: ['June/July', 'June/July', 'June/July']},
|
526
|
+
index: ['Jane', 'John', 'Mark'],
|
527
|
+
order: [:salary, :month])
|
528
|
+
}
|
529
|
+
end
|
530
|
+
|
531
|
+
context 'group and aggregate sum and lambda functions on dataframe' do
|
532
|
+
subject { dataframe.group_by([:employee]).aggregate(
|
533
|
+
salary: :sum,
|
534
|
+
month: ->(vec) { vec.to_a.join('/') },
|
535
|
+
mean_salary: ->(df) { df.salary.mean },
|
536
|
+
periods: ->(df) { df.size }
|
537
|
+
)}
|
538
|
+
|
539
|
+
it { is_expected.to eq Daru::DataFrame.new({
|
540
|
+
salary: [1100, 2200, 1300],
|
541
|
+
month: ['June/July', 'June/July', 'June/July'],
|
542
|
+
mean_salary: [550.0, 1100.0, 650.0],
|
543
|
+
periods: [2, 2, 2]},
|
544
|
+
index: ['Jane', 'John', 'Mark'], order: [:salary, :month,
|
545
|
+
:mean_salary, :periods]) }
|
546
|
+
end
|
547
|
+
|
548
|
+
context 'group_by and aggregate on mixed MultiIndex' do
|
549
|
+
let(:df) { Daru::DataFrame.new(
|
550
|
+
name: ['Ram','Krishna','Ram','Krishna','Krishna'],
|
551
|
+
visited: [
|
552
|
+
'Hyderabad', 'Delhi', 'Mumbai', 'Raipur', 'Banglore']
|
553
|
+
)
|
554
|
+
}
|
555
|
+
let(:df_mixed) { Daru::DataFrame.new(
|
556
|
+
name: ['Krishna','Ram','Krishna','Krishna'],
|
557
|
+
visited: [
|
558
|
+
'Delhi', 'Mumbai', 'Raipur', 'Banglore']
|
559
|
+
)
|
560
|
+
}
|
561
|
+
it 'group_by' do
|
562
|
+
expect(df.group_by(:name).df).to eq(
|
563
|
+
Daru::DataFrame.new({
|
564
|
+
visited: ['Delhi', 'Raipur', 'Banglore', 'Hyderabad', 'Mumbai']},
|
565
|
+
index: Daru::MultiIndex.from_tuples(
|
566
|
+
[['Krishna', 1], ['Krishna', 3], ['Krishna', 4],
|
567
|
+
['Ram', 0], ['Ram', 2]]
|
568
|
+
)
|
569
|
+
)
|
570
|
+
)
|
571
|
+
end
|
572
|
+
|
573
|
+
it 'group_by and aggregate' do
|
574
|
+
expect(
|
575
|
+
df.group_by(:name).aggregate(
|
576
|
+
visited: -> (vec){vec.to_a.join(',')})).to eq(
|
577
|
+
Daru::DataFrame.new({
|
578
|
+
visited: ['Delhi,Raipur,Banglore', 'Hyderabad,Mumbai']},
|
579
|
+
index: ['Krishna', 'Ram']
|
580
|
+
)
|
581
|
+
)
|
582
|
+
end
|
583
|
+
|
584
|
+
it 'group_by and aggregate when anyone index is not multiple times' do
|
585
|
+
expect(
|
586
|
+
df_mixed.group_by(:name).aggregate(
|
587
|
+
visited: -> (vec){vec.to_a.join(',')})).to eq(
|
588
|
+
Daru::DataFrame.new({
|
589
|
+
visited: ['Delhi,Raipur,Banglore', 'Mumbai']},
|
590
|
+
index: ['Krishna', 'Ram']
|
591
|
+
)
|
592
|
+
)
|
593
|
+
end
|
594
|
+
end
|
595
|
+
end
|
468
596
|
end
|
data/spec/dataframe_spec.rb
CHANGED
@@ -145,8 +145,15 @@ describe Daru::DataFrame do
|
|
145
145
|
end
|
146
146
|
|
147
147
|
context "#initialize" do
|
148
|
+
|
149
|
+
it "initializes an empty DataFrame with no arguments" do
|
150
|
+
df = Daru::DataFrame.new
|
151
|
+
expect(df.nrows).to eq(0)
|
152
|
+
expect(df.ncols).to eq(0)
|
153
|
+
end
|
154
|
+
|
148
155
|
context Daru::Index do
|
149
|
-
it "initializes an empty DataFrame" do
|
156
|
+
it "initializes an empty DataFrame with empty source arg" do
|
150
157
|
df = Daru::DataFrame.new({}, order: [:a, :b])
|
151
158
|
|
152
159
|
expect(df.vectors).to eq(Daru::Index.new [:a, :b])
|
@@ -164,6 +171,13 @@ describe Daru::DataFrame do
|
|
164
171
|
expect(df.a) .to eq([1,2,3,4,5].dv(:a, df.index))
|
165
172
|
end
|
166
173
|
|
174
|
+
it "initializes from a Hash and preserves default order" do
|
175
|
+
df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
|
176
|
+
index: [:one, :two, :three, :four, :five])
|
177
|
+
|
178
|
+
expect(df.vectors).to eq(Daru::Index.new [:b, :a])
|
179
|
+
end
|
180
|
+
|
167
181
|
it "initializes from a Hash of Vectors" do
|
168
182
|
va = Daru::Vector.new([1,2,3,4,5], index: [:one, :two, :three, :four, :five])
|
169
183
|
vb = Daru::Vector.new([11,12,13,14,15], index: [:one, :two, :three, :four, :five])
|
@@ -221,7 +235,7 @@ describe Daru::DataFrame do
|
|
221
235
|
df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]})
|
222
236
|
|
223
237
|
expect(df.index) .to eq(Daru::Index.new [0,1,2,3,4])
|
224
|
-
expect(df.vectors).to eq(Daru::Index.new [:
|
238
|
+
expect(df.vectors).to eq(Daru::Index.new [:b, :a])
|
225
239
|
end
|
226
240
|
|
227
241
|
it "aligns indexes properly" do
|
@@ -1573,6 +1587,22 @@ describe Daru::DataFrame do
|
|
1573
1587
|
}
|
1574
1588
|
end
|
1575
1589
|
|
1590
|
+
context 'with mulitiindex DF' do
|
1591
|
+
subject(:data_frame) {
|
1592
|
+
Daru::DataFrame.new({b: [11,12,13], a: [1,2,3],
|
1593
|
+
c: [11,22,33]}, order: [:a, :b, :c],
|
1594
|
+
index: Daru::MultiIndex.from_tuples([[:one, :two], [:one, :three], [:two, :four]]))
|
1595
|
+
}
|
1596
|
+
|
1597
|
+
before { data_frame.add_row [100,200,300], [:two, :five] }
|
1598
|
+
|
1599
|
+
it { is_expected.to eq(Daru::DataFrame.new({
|
1600
|
+
b: [11,12,13,200], a: [1,2,3,100],
|
1601
|
+
c: [11,22,33,300]}, order: [:a, :b, :c],
|
1602
|
+
index: Daru::MultiIndex.from_tuples([[:one, :two], [:one, :three], [:two, :four], [:two, :five]])))
|
1603
|
+
}
|
1604
|
+
end
|
1605
|
+
|
1576
1606
|
it "allows adding rows after making empty DF by specfying only order" do
|
1577
1607
|
df = Daru::DataFrame.new({}, order: [:a, :b, :c])
|
1578
1608
|
df.add_row [1,2,3]
|
@@ -1790,6 +1820,59 @@ describe Daru::DataFrame do
|
|
1790
1820
|
end
|
1791
1821
|
end
|
1792
1822
|
|
1823
|
+
describe 'uniq' do
|
1824
|
+
let(:df) do
|
1825
|
+
Daru::DataFrame.from_csv 'spec/fixtures/duplicates.csv'
|
1826
|
+
end
|
1827
|
+
|
1828
|
+
context 'with no args' do
|
1829
|
+
it do
|
1830
|
+
result = df.uniq
|
1831
|
+
expect(result.shape.first).to eq 30
|
1832
|
+
end
|
1833
|
+
end
|
1834
|
+
|
1835
|
+
context 'given a vector' do
|
1836
|
+
it do
|
1837
|
+
result = df.uniq("color")
|
1838
|
+
expect(result.shape.first).to eq 2
|
1839
|
+
end
|
1840
|
+
end
|
1841
|
+
|
1842
|
+
context 'given an array of vectors' do
|
1843
|
+
it do
|
1844
|
+
result = df.uniq("color", "director_name")
|
1845
|
+
expect(result.shape.first).to eq 29
|
1846
|
+
end
|
1847
|
+
end
|
1848
|
+
end
|
1849
|
+
|
1850
|
+
context '#rolling_fillna!' do
|
1851
|
+
subject do
|
1852
|
+
Daru::DataFrame.new({
|
1853
|
+
a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
|
1854
|
+
b: [:a, :b, nil, Float::NAN, nil, 3, 5, nil],
|
1855
|
+
c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
|
1856
|
+
})
|
1857
|
+
end
|
1858
|
+
|
1859
|
+
context 'rolling_fillna! forwards' do
|
1860
|
+
before { subject.rolling_fillna!(:forward) }
|
1861
|
+
it { is_expected.to be_a Daru::DataFrame }
|
1862
|
+
its(:'a.to_a') { is_expected.to eq [1, 2, 3, 3, 3, 3, 1, 7] }
|
1863
|
+
its(:'b.to_a') { is_expected.to eq [:a, :b, :b, :b, :b, 3, 5, 5] }
|
1864
|
+
its(:'c.to_a') { is_expected.to eq ['a', 'a', 3, 4, 3, 5, 5, 7] }
|
1865
|
+
end
|
1866
|
+
|
1867
|
+
context 'rolling_fillna! backwards' do
|
1868
|
+
before { subject.rolling_fillna!(:backward) }
|
1869
|
+
it { is_expected.to be_a Daru::DataFrame }
|
1870
|
+
its(:'a.to_a') { is_expected.to eq [1, 2, 3, 1, 1, 1, 1, 7] }
|
1871
|
+
its(:'b.to_a') { is_expected.to eq [:a, :b, 3, 3, 3, 3, 5, 0] }
|
1872
|
+
its(:'c.to_a') { is_expected.to eq ['a', 3, 3, 4, 3, 5, 7, 7] }
|
1873
|
+
end
|
1874
|
+
end
|
1875
|
+
|
1793
1876
|
context "#clone" do
|
1794
1877
|
it "returns a view of the whole dataframe" do
|
1795
1878
|
cloned = @data_frame.clone
|
@@ -3160,6 +3243,15 @@ describe Daru::DataFrame do
|
|
3160
3243
|
it { expect(subject['a'].to_a).to eq [1, 2, 3] }
|
3161
3244
|
it { expect(subject[:b].to_a).to eq [4, 5, 6] }
|
3162
3245
|
end
|
3246
|
+
|
3247
|
+
context "preserves indices for dataframes with same index" do
|
3248
|
+
let(:index) { ['one','two','three'] }
|
3249
|
+
let(:df1) { Daru::DataFrame.new({ 'a' => [1, 2, 3], 'b' => [3, 4, 5] }, index: index) }
|
3250
|
+
let(:df2) { Daru::DataFrame.new({ 'c' => [4, 5, 6], 'd' => [7, 8, 9] }, index: index) }
|
3251
|
+
subject { df1.merge df2 }
|
3252
|
+
|
3253
|
+
its(:index) { is_expected.to eq Daru::Index.new(index) }
|
3254
|
+
end
|
3163
3255
|
end
|
3164
3256
|
|
3165
3257
|
context "#vector_by_calculation" do
|
@@ -3176,23 +3268,27 @@ describe Daru::DataFrame do
|
|
3176
3268
|
|
3177
3269
|
context "#vector_sum" do
|
3178
3270
|
before do
|
3179
|
-
a1 = Daru::Vector.new [1, 2, 3, 4, 5, nil]
|
3180
|
-
a2 = Daru::Vector.new [10, 10, 20, 20, 20, 30]
|
3181
|
-
b1 = Daru::Vector.new [nil, 1, 1, 1, 1, 2]
|
3182
|
-
b2 = Daru::Vector.new [2, 2, 2, nil, 2, 3]
|
3271
|
+
a1 = Daru::Vector.new [1, 2, 3, 4, 5, nil, nil]
|
3272
|
+
a2 = Daru::Vector.new [10, 10, 20, 20, 20, 30, nil]
|
3273
|
+
b1 = Daru::Vector.new [nil, 1, 1, 1, 1, 2, nil]
|
3274
|
+
b2 = Daru::Vector.new [2, 2, 2, nil, 2, 3, nil]
|
3183
3275
|
@df = Daru::DataFrame.new({ :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2 })
|
3184
3276
|
end
|
3185
3277
|
|
3186
3278
|
it "calculates complete vector sum" do
|
3187
|
-
expect(@df.vector_sum).to eq(Daru::Vector.new [nil, 15, 26, nil, 28, nil])
|
3279
|
+
expect(@df.vector_sum).to eq(Daru::Vector.new [nil, 15, 26, nil, 28, nil, nil])
|
3280
|
+
end
|
3281
|
+
|
3282
|
+
it "ignores nils if skipnil is true" do
|
3283
|
+
expect(@df.vector_sum skipnil: true).to eq(Daru::Vector.new [13, 15, 26, 25, 28, 35, 0])
|
3188
3284
|
end
|
3189
3285
|
|
3190
3286
|
it "calculates partial vector sum" do
|
3191
3287
|
a = @df.vector_sum([:a1, :a2])
|
3192
3288
|
b = @df.vector_sum([:b1, :b2])
|
3193
3289
|
|
3194
|
-
expect(a).to eq(Daru::Vector.new [11, 12, 23, 24, 25, nil])
|
3195
|
-
expect(b).to eq(Daru::Vector.new [nil, 3, 3, nil, 3, 5])
|
3290
|
+
expect(a).to eq(Daru::Vector.new [11, 12, 23, 24, 25, nil, nil])
|
3291
|
+
expect(b).to eq(Daru::Vector.new [nil, 3, 3, nil, 3, 5, nil])
|
3196
3292
|
end
|
3197
3293
|
end
|
3198
3294
|
|
@@ -3380,7 +3476,8 @@ describe Daru::DataFrame do
|
|
3380
3476
|
ev_b = Daru::Vector.new [1, 1, 0]
|
3381
3477
|
ev_c = Daru::Vector.new [0, 1, 1]
|
3382
3478
|
df2 = Daru::DataFrame.new({
|
3383
|
-
:_id => ev_id, 'a' => ev_a, 'b' => ev_b, 'c' => ev_c }
|
3479
|
+
:_id => ev_id, 'a' => ev_a, 'b' => ev_b, 'c' => ev_c },
|
3480
|
+
order: ['a', 'b', 'c', :_id])
|
3384
3481
|
|
3385
3482
|
expect(df2).to eq(df)
|
3386
3483
|
end
|
@@ -3928,6 +4025,24 @@ describe Daru::DataFrame do
|
|
3928
4025
|
end
|
3929
4026
|
end
|
3930
4027
|
|
4028
|
+
context '#aggregate' do
|
4029
|
+
let(:cat_idx) { Daru::CategoricalIndex.new [:a, :b, :a, :a, :c] }
|
4030
|
+
let(:df) { Daru::DataFrame.new(num: [52,12,07,17,01], cat_index: cat_idx) }
|
4031
|
+
let(:df_cat_idx) {
|
4032
|
+
Daru::DataFrame.new({num: [52,12,07,17,01]}, index: cat_idx) }
|
4033
|
+
|
4034
|
+
it 'lambda function on particular column' do
|
4035
|
+
expect(df.aggregate(num_100_times: ->(df) { df.num*100 })).to eq(
|
4036
|
+
Daru::DataFrame.new(num_100_times: [5200, 1200, 700, 1700, 100])
|
4037
|
+
)
|
4038
|
+
end
|
4039
|
+
it 'aggregate sum on particular column' do
|
4040
|
+
expect(df_cat_idx.aggregate(num: :sum)).to eq(
|
4041
|
+
Daru::DataFrame.new({num: [76, 12, 1]}, index: [:a, :b, :c])
|
4042
|
+
)
|
4043
|
+
end
|
4044
|
+
end
|
4045
|
+
|
3931
4046
|
context '#create_sql' do
|
3932
4047
|
let(:df) { Daru::DataFrame.new({
|
3933
4048
|
a: [1,2,3],
|