daru 0.0.5 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.build.sh +14 -0
- data/.travis.yml +26 -4
- data/CONTRIBUTING.md +31 -0
- data/Gemfile +1 -2
- data/{History.txt → History.md} +110 -44
- data/README.md +21 -288
- data/Rakefile +1 -0
- data/daru.gemspec +12 -8
- data/lib/daru.rb +36 -1
- data/lib/daru/accessors/array_wrapper.rb +8 -3
- data/lib/daru/accessors/gsl_wrapper.rb +113 -0
- data/lib/daru/accessors/nmatrix_wrapper.rb +6 -17
- data/lib/daru/core/group_by.rb +0 -1
- data/lib/daru/dataframe.rb +1192 -83
- data/lib/daru/extensions/rserve.rb +21 -0
- data/lib/daru/index.rb +14 -0
- data/lib/daru/io/io.rb +170 -8
- data/lib/daru/maths/arithmetic/dataframe.rb +4 -3
- data/lib/daru/maths/arithmetic/vector.rb +4 -4
- data/lib/daru/maths/statistics/dataframe.rb +48 -27
- data/lib/daru/maths/statistics/vector.rb +215 -33
- data/lib/daru/monkeys.rb +53 -7
- data/lib/daru/multi_index.rb +21 -4
- data/lib/daru/plotting/dataframe.rb +83 -25
- data/lib/daru/plotting/vector.rb +9 -10
- data/lib/daru/vector.rb +596 -61
- data/lib/daru/version.rb +3 -0
- data/spec/accessors/wrappers_spec.rb +51 -0
- data/spec/core/group_by_spec.rb +0 -2
- data/spec/daru_spec.rb +58 -0
- data/spec/dataframe_spec.rb +768 -73
- data/spec/extensions/rserve_spec.rb +52 -0
- data/spec/fixtures/bank2.dat +200 -0
- data/spec/fixtures/repeated_fields.csv +7 -0
- data/spec/fixtures/scientific_notation.csv +4 -0
- data/spec/fixtures/test_xls.xls +0 -0
- data/spec/io/io_spec.rb +161 -24
- data/spec/math/arithmetic/dataframe_spec.rb +26 -7
- data/spec/math/arithmetic/vector_spec.rb +8 -0
- data/spec/math/statistics/dataframe_spec.rb +16 -1
- data/spec/math/statistics/vector_spec.rb +215 -47
- data/spec/spec_helper.rb +21 -2
- data/spec/vector_spec.rb +368 -12
- metadata +99 -16
- data/lib/version.rb +0 -3
- data/notebooks/grouping_splitting_pivots.ipynb +0 -529
- data/notebooks/intro_with_music_data_.ipynb +0 -303
data/lib/daru/version.rb
ADDED
@@ -33,4 +33,55 @@ end
|
|
33
33
|
|
34
34
|
describe Daru::Accessors::ArrayWrapper do
|
35
35
|
|
36
|
+
end
|
37
|
+
|
38
|
+
describe Daru::Accessors::GSLWrapper do
|
39
|
+
before :each do
|
40
|
+
@stub_context = Object.new
|
41
|
+
@gsl_wrapper = Daru::Accessors::GSLWrapper.new([1,2,3,4,5,6], @stub_context)
|
42
|
+
end
|
43
|
+
|
44
|
+
context ".new" do
|
45
|
+
it "actually creates a GSL Vector" do
|
46
|
+
expect(@gsl_wrapper.data.class).to eq(GSL::Vector)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
context "#mean" do
|
51
|
+
it "computes mean" do
|
52
|
+
expect(@gsl_wrapper.mean).to eq(3.5)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
context "#map!" do
|
57
|
+
it "destructively maps" do
|
58
|
+
expect(@gsl_wrapper.map! { |a| a += 1 }).to eq(
|
59
|
+
Daru::Accessors::GSLWrapper.new([2,3,4,5,6,7], @stub_context)
|
60
|
+
)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
context "#delete_at" do
|
65
|
+
it "deletes at key" do
|
66
|
+
expect(@gsl_wrapper.delete_at(2)).to eq(3)
|
67
|
+
|
68
|
+
expect(@gsl_wrapper).to eq(
|
69
|
+
Daru::Accessors::GSLWrapper.new([1,2,4,5,6], @stub_context)
|
70
|
+
)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
context "#index" do
|
75
|
+
it "returns index of value" do
|
76
|
+
expect(@gsl_wrapper.index(3)).to eq(2)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
context "#push" do
|
81
|
+
it "appends element" do
|
82
|
+
expect(@gsl_wrapper.push(15)).to eq(
|
83
|
+
Daru::Accessors::GSLWrapper.new([1,2,3,4,5,6,15], @stub_context)
|
84
|
+
)
|
85
|
+
end
|
86
|
+
end
|
36
87
|
end
|
data/spec/core/group_by_spec.rb
CHANGED
data/spec/daru_spec.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'spec_helper.rb'
|
2
|
+
|
3
|
+
describe "Daru.lazy_update" do
|
4
|
+
context "A variable which will set whether Vector metadata is updated immediately or lazily." do
|
5
|
+
describe Daru::Vector do
|
6
|
+
it "does updates metadata immediately when LAZY_UPDATE is set to default false" do
|
7
|
+
v = Daru::Vector.new [1,2,3,4,nil,nil,3,nil]
|
8
|
+
v[1] = nil
|
9
|
+
|
10
|
+
expect(v.missing_positions.include?(1)).to eq(true)
|
11
|
+
end
|
12
|
+
|
13
|
+
it "does NOT update metadata immediately when @@lazy_update is set to default true. Update done when #update is called" do
|
14
|
+
Daru.lazy_update = true
|
15
|
+
v = Daru::Vector.new [1,2,3,4,nil,nil]
|
16
|
+
v[1] = nil
|
17
|
+
v[0] = nil
|
18
|
+
|
19
|
+
expect(v.missing_positions.include?(0)).to eq(false)
|
20
|
+
expect(v.missing_positions.include?(1)).to eq(false)
|
21
|
+
|
22
|
+
v.update
|
23
|
+
expect(v.missing_positions.include?(0)).to eq(true)
|
24
|
+
expect(v.missing_positions.include?(1)).to eq(true)
|
25
|
+
|
26
|
+
Daru.lazy_update = false
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe Daru::DataFrame do
|
31
|
+
before do
|
32
|
+
v = Daru::Vector.new [1,2,3,4,nil,nil,3,nil]
|
33
|
+
@df = Daru::DataFrame.new({a: v, b: v, c: v})
|
34
|
+
end
|
35
|
+
|
36
|
+
it "does updates metadata immediately when LAZY_UPDATE is set to default false" do
|
37
|
+
@df[:a][1] = nil
|
38
|
+
|
39
|
+
expect(@df[:a].missing_positions.include?(1)).to eq(true)
|
40
|
+
end
|
41
|
+
|
42
|
+
it "does NOT update metadata immediately when @@lazy_update is set to default true. Update done when #update is called" do
|
43
|
+
Daru.lazy_update = true
|
44
|
+
@df[:c][0] = nil
|
45
|
+
@df[:a][1] = nil
|
46
|
+
|
47
|
+
expect(@df[:c].missing_positions.include?(0)).to eq(false)
|
48
|
+
expect(@df[:a].missing_positions.include?(1)).to eq(false)
|
49
|
+
|
50
|
+
@df.update
|
51
|
+
expect(@df[:c].missing_positions.include?(0)).to eq(true)
|
52
|
+
expect(@df[:a].missing_positions.include?(1)).to eq(true)
|
53
|
+
|
54
|
+
Daru.lazy_update = false
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
data/spec/dataframe_spec.rb
CHANGED
@@ -264,6 +264,37 @@ describe Daru::DataFrame do
|
|
264
264
|
expect(df.vectors).to eq([:a,:c,:b].to_index)
|
265
265
|
end
|
266
266
|
|
267
|
+
it "does not copy vectors when clone: false" do
|
268
|
+
a = Daru::Vector.new([1,2,3,4,5])
|
269
|
+
b = Daru::Vector.new([1,2,3,4,5])
|
270
|
+
c = Daru::Vector.new([1,2,3,4,5])
|
271
|
+
df = Daru::DataFrame.new({a: a, b: b, c: c}, clone: false)
|
272
|
+
|
273
|
+
expect(df[:a].object_id).to eq(a.object_id)
|
274
|
+
expect(df[:b].object_id).to eq(b.object_id)
|
275
|
+
expect(df[:c].object_id).to eq(c.object_id)
|
276
|
+
end
|
277
|
+
|
278
|
+
it "allows creation of empty dataframe with only order", focus: true do
|
279
|
+
df = Daru::DataFrame.new({}, order: [:a, :b, :c])
|
280
|
+
df[:a] = Daru::Vector.new([1,2,3,4,5,6])
|
281
|
+
|
282
|
+
expect(df.size).to eq(6)
|
283
|
+
expect(df[:a]).to eq(Daru::Vector.new([1,2,3,4,5,6]))
|
284
|
+
expect(df[:b]).to eq(Daru::Vector.new([nil,nil,nil,nil,nil,nil]))
|
285
|
+
expect(df[:c]).to eq(Daru::Vector.new([nil,nil,nil,nil,nil,nil]))
|
286
|
+
end
|
287
|
+
|
288
|
+
it "allows creation of dataframe without specifying order or index" do
|
289
|
+
df = Daru::DataFrame.new({})
|
290
|
+
df[:a] = Daru::Vector.new([1,2,3,4,5])
|
291
|
+
|
292
|
+
expect(df.size) .to eq(5)
|
293
|
+
expect(df.index.to_a) .to eq([0,1,2,3,4])
|
294
|
+
expect(df.vectors.to_a).to eq([:a])
|
295
|
+
expect(df[:a]) .to eq(Daru::Vector.new([1,2,3,4,5]))
|
296
|
+
end
|
297
|
+
|
267
298
|
it "raises error for incomplete DataFrame index" do
|
268
299
|
expect {
|
269
300
|
df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
@@ -390,6 +421,14 @@ describe Daru::DataFrame do
|
|
390
421
|
it "accesses vector with Integer index" do
|
391
422
|
expect(@df[0, :vector]).to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
|
392
423
|
end
|
424
|
+
|
425
|
+
it "returns a subset of DataFrame when specified range" do
|
426
|
+
subset = @df[:b..:c]
|
427
|
+
expect(subset).to eq(Daru::DataFrame.new({
|
428
|
+
b: [11,12,13,14,15],
|
429
|
+
c: [11,22,33,44,55]
|
430
|
+
}, index: [:one, :two, :three, :four, :five]))
|
431
|
+
end
|
393
432
|
end
|
394
433
|
|
395
434
|
context Daru::MultiIndex do
|
@@ -501,7 +540,42 @@ describe Daru::DataFrame do
|
|
501
540
|
end
|
502
541
|
|
503
542
|
context Daru::MultiIndex do
|
504
|
-
|
543
|
+
it "raises error when incomplete index specified but index is absent" do
|
544
|
+
expect {
|
545
|
+
@df_mi[:d] = [100,200,300,400,100,200,300,400,100,200,300,400]
|
546
|
+
}.to raise_error
|
547
|
+
end
|
548
|
+
|
549
|
+
it "assigns all sub-indexes when a top level index is specified" do
|
550
|
+
pending
|
551
|
+
@df_mi[:a] = [100,200,300,400,100,200,300,400,100,200,300,400]
|
552
|
+
|
553
|
+
expect(@df_mi).to eq(Daru::DataFrame.new([
|
554
|
+
[100,200,300,400,100,200,300,400,100,200,300,400],
|
555
|
+
[100,200,300,400,100,200,300,400,100,200,300,400],
|
556
|
+
@vector_arry1,
|
557
|
+
@vector_arry2], index: @multi_index, order: @order_mi))
|
558
|
+
end
|
559
|
+
|
560
|
+
it "creates a new vector when full index specfied" do
|
561
|
+
pending
|
562
|
+
order = Daru::MultiIndex.new([
|
563
|
+
[:a,:one,:bar],
|
564
|
+
[:a,:two,:baz],
|
565
|
+
[:b,:two,:foo],
|
566
|
+
[:b,:one,:foo],
|
567
|
+
[:c,:one,:bar]])
|
568
|
+
answer = Daru::DataFrame.new([
|
569
|
+
@vector_arry1,
|
570
|
+
@vector_arry2,
|
571
|
+
@vector_arry1,
|
572
|
+
@vector_arry2,
|
573
|
+
[100,200,300,400,100,200,300,400,100,200,300,400]
|
574
|
+
], index: @multi_index, order: order)
|
575
|
+
@df_mi[:c,:one,:bar] = [100,200,300,400,100,200,300,400,100,200,300,400]
|
576
|
+
|
577
|
+
expect(@df_mi).to eq(answer)
|
578
|
+
end
|
505
579
|
end
|
506
580
|
end
|
507
581
|
|
@@ -551,6 +625,12 @@ describe Daru::DataFrame do
|
|
551
625
|
expect(@df.row[:two]).to eq([2,9,11].dv(:two, [:a, :b, :c]))
|
552
626
|
end
|
553
627
|
|
628
|
+
it "correctlu aligns assinged DV by index for new rows" do
|
629
|
+
@df.row[:latest] = Daru::Vector.new([2,3,1], index: [:b,:c,:a])
|
630
|
+
|
631
|
+
expect(@df.row[:latest]).to eq(Daru::Vector.new([1,2,3], index: [:a,:b,:c]))
|
632
|
+
end
|
633
|
+
|
554
634
|
it "inserts nils for indexes that dont exist in the DataFrame" do
|
555
635
|
@df.row[:two] = [49, 99, 59].dv(nil, [:oo, :aah, :gaah])
|
556
636
|
|
@@ -667,6 +747,19 @@ describe Daru::DataFrame do
|
|
667
747
|
end
|
668
748
|
end
|
669
749
|
|
750
|
+
context "#add_row" do
|
751
|
+
it "allows adding rows after making empty DF by specfying only order" do
|
752
|
+
df = Daru::DataFrame.new({}, order: [:a, :b, :c])
|
753
|
+
df.add_row [1,2,3]
|
754
|
+
df.add_row [5,6,7]
|
755
|
+
|
756
|
+
expect(df[:a]).to eq(Daru::Vector.new([1,5]))
|
757
|
+
expect(df[:b]).to eq(Daru::Vector.new([2,6]))
|
758
|
+
expect(df[:c]).to eq(Daru::Vector.new([3,7]))
|
759
|
+
expect(df.index).to eq(Daru::Index.new([0,1]))
|
760
|
+
end
|
761
|
+
end
|
762
|
+
|
670
763
|
context "#vector" do
|
671
764
|
context Daru::Index do
|
672
765
|
it "appends an Array as a Daru::Vector" do
|
@@ -722,38 +815,89 @@ describe Daru::DataFrame do
|
|
722
815
|
end
|
723
816
|
|
724
817
|
context "#dup" do
|
725
|
-
|
726
|
-
|
818
|
+
context Daru::Index do
|
819
|
+
it "dups every data structure inside DataFrame" do
|
820
|
+
clo = @data_frame.dup
|
727
821
|
|
728
|
-
|
729
|
-
|
730
|
-
|
822
|
+
expect(clo.object_id) .not_to eq(@data_frame.object_id)
|
823
|
+
expect(clo.vectors.object_id).not_to eq(@data_frame.object_id)
|
824
|
+
expect(clo.index.object_id) .not_to eq(@data_frame.object_id)
|
731
825
|
|
732
|
-
|
733
|
-
|
826
|
+
@data_frame.each_vector_with_index do |vector, index|
|
827
|
+
expect(vector.object_id).not_to eq(clo.vector[index].object_id)
|
828
|
+
end
|
734
829
|
end
|
735
830
|
end
|
736
|
-
end
|
737
831
|
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
ret = @data_frame.each_vector do |vector|
|
742
|
-
expect(vector.index).to eq([:one, :two, :three, :four, :five].to_index)
|
743
|
-
expect(vector.class).to eq(Daru::Vector)
|
744
|
-
end
|
832
|
+
context Daru::MultiIndex do
|
833
|
+
it "duplicates with multi index" do
|
834
|
+
clo = @df_mi.dup
|
745
835
|
|
746
|
-
expect(
|
836
|
+
expect(clo) .to eq(@df_mi)
|
837
|
+
expect(clo.vectors.object_id).not_to eq(@df_mi.vectors.object_id)
|
838
|
+
expect(clo.index.object_id) .not_to eq(@df_mi.index.object_id)
|
747
839
|
end
|
840
|
+
end
|
841
|
+
end
|
748
842
|
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
843
|
+
context "#dup_only_valid" do
|
844
|
+
before do
|
845
|
+
@missing_data_df = Daru::DataFrame.new({
|
846
|
+
a: [1 , 2, 3, nil, 4, nil, 5],
|
847
|
+
b: [nil, 2, 3, nil, 4, nil, 5],
|
848
|
+
c: [1, 2, 3, 43 , 4, nil, 5]
|
849
|
+
})
|
753
850
|
end
|
754
851
|
|
755
|
-
|
852
|
+
it "dups rows with non-missing data only" do
|
853
|
+
df = Daru::DataFrame.new({
|
854
|
+
a: [2, 3, 4, 5],
|
855
|
+
b: [2, 3, 4, 5],
|
856
|
+
c: [2, 3, 4, 5]
|
857
|
+
}, index: [1,2,4,6])
|
858
|
+
expect(@missing_data_df.dup_only_valid).to eq(df)
|
859
|
+
end
|
860
|
+
|
861
|
+
it "dups only the specified vectors" do
|
862
|
+
df = Daru::DataFrame.new({
|
863
|
+
a: [2,3,4,5],
|
864
|
+
c: [2,3,4,5]
|
865
|
+
}, index: [1,2,4,6])
|
866
|
+
expect(@missing_data_df.dup_only_valid([:a, :c])).to eq(df)
|
867
|
+
end
|
868
|
+
end
|
869
|
+
|
870
|
+
context "#clone" do
|
871
|
+
it "returns a view of the whole dataframe" do
|
872
|
+
cloned = @data_frame.clone
|
873
|
+
expect(@data_frame.object_id).to_not eq(cloned.object_id)
|
874
|
+
expect(@data_frame[:a].object_id).to eq(cloned[:a].object_id)
|
875
|
+
expect(@data_frame[:b].object_id).to eq(cloned[:b].object_id)
|
876
|
+
expect(@data_frame[:c].object_id).to eq(cloned[:c].object_id)
|
877
|
+
end
|
878
|
+
|
879
|
+
it "returns a view of selected vectors" do
|
880
|
+
cloned = @data_frame.clone(:a, :b)
|
881
|
+
expect(cloned.object_id).to_not eq(@data_frame.object_id)
|
882
|
+
expect(cloned[:a].object_id).to eq(@data_frame[:a].object_id)
|
883
|
+
expect(cloned[:b].object_id).to eq(@data_frame[:b].object_id)
|
884
|
+
end
|
756
885
|
|
886
|
+
it "clones properly when supplied array" do
|
887
|
+
cloned = @data_frame.clone([:a, :b])
|
888
|
+
expect(cloned.object_id).to_not eq(@data_frame.object_id)
|
889
|
+
expect(cloned[:a].object_id).to eq(@data_frame[:a].object_id)
|
890
|
+
expect(cloned[:b].object_id).to eq(@data_frame[:b].object_id)
|
891
|
+
end
|
892
|
+
end
|
893
|
+
|
894
|
+
context "#clone_structure" do
|
895
|
+
it "clones only the index and vector structures of the data frame" do
|
896
|
+
cs = @data_frame.clone_structure
|
897
|
+
|
898
|
+
expect(cs.vectors).to eq(@data_frame.vectors)
|
899
|
+
expect(cs.index).to eq(@data_frame.index)
|
900
|
+
expect(cs[:a]).to eq(Daru::Vector.new([nil] * cs[:a].size, index: @data_frame.index))
|
757
901
|
end
|
758
902
|
end
|
759
903
|
|
@@ -772,17 +916,6 @@ describe Daru::DataFrame do
|
|
772
916
|
end
|
773
917
|
end
|
774
918
|
|
775
|
-
context "#each_row" do
|
776
|
-
it "iterates over rows" do
|
777
|
-
ret = @data_frame.each_row do |row|
|
778
|
-
expect(row.index).to eq([:a, :b, :c].to_index)
|
779
|
-
expect(row.class).to eq(Daru::Vector)
|
780
|
-
end
|
781
|
-
|
782
|
-
expect(ret).to eq(@data_frame)
|
783
|
-
end
|
784
|
-
end
|
785
|
-
|
786
919
|
context "#each_row_with_index" do
|
787
920
|
it "iterates over rows with indexes" do
|
788
921
|
idxs = []
|
@@ -797,81 +930,154 @@ describe Daru::DataFrame do
|
|
797
930
|
end
|
798
931
|
end
|
799
932
|
|
800
|
-
context "#
|
801
|
-
it "iterates over
|
802
|
-
|
803
|
-
|
804
|
-
|
933
|
+
context "#each" do
|
934
|
+
it "iterates over rows" do
|
935
|
+
ret = @data_frame.each(:row) do |row|
|
936
|
+
expect(row.index).to eq([:a, :b, :c].to_index)
|
937
|
+
expect(row.class).to eq(Daru::Vector)
|
938
|
+
end
|
805
939
|
|
806
|
-
ret
|
807
|
-
|
940
|
+
expect(ret).to eq(@data_frame)
|
941
|
+
end
|
942
|
+
|
943
|
+
it "iterates over all vectors" do
|
944
|
+
ret = @data_frame.each do |vector|
|
945
|
+
expect(vector.index).to eq([:one, :two, :three, :four, :five].to_index)
|
946
|
+
expect(vector.class).to eq(Daru::Vector)
|
808
947
|
end
|
809
948
|
|
810
|
-
expect(ret).to eq(
|
811
|
-
|
949
|
+
expect(ret).to eq(@data_frame)
|
950
|
+
end
|
951
|
+
|
952
|
+
it "returns Enumerable if no block specified" do
|
953
|
+
ret = @data_frame.each
|
954
|
+
expect(ret.is_a?(Enumerator)).to eq(true)
|
812
955
|
end
|
813
956
|
end
|
814
957
|
|
815
|
-
context "#
|
816
|
-
|
817
|
-
|
958
|
+
context "#recode" do
|
959
|
+
before do
|
960
|
+
@ans_vector = Daru::DataFrame.new({b: [21,22,23,24,25], a: [11,12,13,14,15],
|
818
961
|
c: [21,32,43,54,65]}, order: [:a, :b, :c],
|
819
962
|
index: [:one, :two, :three, :four, :five])
|
820
963
|
|
821
|
-
@
|
964
|
+
@ans_rows = Daru::DataFrame.new({b: [121, 144, 169, 196, 225], a: [1,4,9,16,25],
|
965
|
+
c: [121, 484, 1089, 1936, 3025]}, order: [:a, :b, :c],
|
966
|
+
index: [:one, :two, :three, :four, :five])
|
967
|
+
end
|
968
|
+
|
969
|
+
it "maps over the vectors of a DataFrame and returns a DataFrame" do
|
970
|
+
ret = @data_frame.recode do |vector|
|
822
971
|
vector.map! { |e| e += 10}
|
823
972
|
end
|
824
973
|
|
825
|
-
expect(
|
974
|
+
expect(ret).to eq(@ans_vector)
|
975
|
+
end
|
976
|
+
|
977
|
+
it "maps over the rows of a DataFrame and returns a DataFrame" do
|
978
|
+
ret = @data_frame.recode(:row) do |row|
|
979
|
+
expect(row.class).to eq(Daru::Vector)
|
980
|
+
row.map! { |e| e*e }
|
981
|
+
end
|
982
|
+
|
983
|
+
expect(ret).to eq(@ans_rows)
|
826
984
|
end
|
827
985
|
end
|
828
986
|
|
829
|
-
context "#
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
|
987
|
+
context "#collect" do
|
988
|
+
before do
|
989
|
+
@df = Daru::DataFrame.new({
|
990
|
+
a: [1,2,3,4,5],
|
991
|
+
b: [11,22,33,44,55],
|
992
|
+
c: [1,2,3,4,5]
|
993
|
+
})
|
994
|
+
end
|
834
995
|
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
996
|
+
it "collects calculation over rows and returns a Vector from the results" do
|
997
|
+
expect(@df.collect(:row) { |row| (row[:a] + row[:c]) * row[:c] }).to eq(
|
998
|
+
Daru::Vector.new([2,8,18,32,50])
|
999
|
+
)
|
1000
|
+
end
|
1001
|
+
|
1002
|
+
it "collects calculation over vectors and returns a Vector from the results" do
|
1003
|
+
expect(@df.collect { |v| v[0] * v[1] + v[4] }).to eq(
|
1004
|
+
Daru::Vector.new([7,297,7], index: [:a, :b, :c])
|
1005
|
+
)
|
1006
|
+
end
|
1007
|
+
end
|
1008
|
+
|
1009
|
+
context "#map" do
|
1010
|
+
it "iterates over rows and returns an Array" do
|
1011
|
+
ret = @data_frame.map(:row) do |row|
|
1012
|
+
expect(row.class).to eq(Daru::Vector)
|
1013
|
+
row[:a] * row[:c]
|
839
1014
|
end
|
840
1015
|
|
841
|
-
expect(ret).to eq(
|
842
|
-
expect(
|
1016
|
+
expect(ret).to eq([11, 44, 99, 176, 275])
|
1017
|
+
expect(@data_frame.vectors.to_a).to eq([:a, :b, :c])
|
1018
|
+
end
|
1019
|
+
|
1020
|
+
it "iterates over vectors and returns an Array" do
|
1021
|
+
ret = @data_frame.map do |vector|
|
1022
|
+
vector.mean
|
1023
|
+
end
|
1024
|
+
expect(ret).to eq([3.0, 13.0, 33.0])
|
843
1025
|
end
|
844
1026
|
end
|
845
1027
|
|
846
|
-
context "#
|
847
|
-
|
848
|
-
|
849
|
-
c: [
|
1028
|
+
context "#map!" do
|
1029
|
+
before do
|
1030
|
+
@ans_vector = Daru::DataFrame.new({b: [21,22,23,24,25], a: [11,12,13,14,15],
|
1031
|
+
c: [21,32,43,54,65]}, order: [:a, :b, :c],
|
850
1032
|
index: [:one, :two, :three, :four, :five])
|
851
1033
|
|
852
|
-
|
853
|
-
|
854
|
-
|
1034
|
+
@ans_row = Daru::DataFrame.new({b: [12,13,14,15,16], a: [2,3,4,5,6],
|
1035
|
+
c: [12,23,34,45,56]}, order: [:a, :b, :c],
|
1036
|
+
index: [:one, :two, :three, :four, :five])
|
1037
|
+
end
|
1038
|
+
|
1039
|
+
it "destructively maps over the vectors and changes the DF" do
|
1040
|
+
@data_frame.map! do |vector|
|
1041
|
+
vector + 10
|
1042
|
+
end
|
1043
|
+
expect(@data_frame).to eq(@ans_vector)
|
1044
|
+
end
|
1045
|
+
|
1046
|
+
it "destructively maps over the rows and changes the DF" do
|
1047
|
+
@data_frame.map!(:row) do |row|
|
1048
|
+
row + 1
|
855
1049
|
end
|
856
1050
|
|
857
|
-
expect(
|
1051
|
+
expect(@data_frame).to eq(@ans_row)
|
1052
|
+
end
|
1053
|
+
end
|
1054
|
+
|
1055
|
+
context "#map_vectors_with_index" do
|
1056
|
+
it "iterates over vectors with index and returns an Array" do
|
1057
|
+
idx = []
|
1058
|
+
ret = @data_frame.map_vectors_with_index do |vector, index|
|
1059
|
+
idx << index
|
1060
|
+
vector.recode { |e| e += 10}
|
1061
|
+
end
|
1062
|
+
|
1063
|
+
expect(ret).to eq([
|
1064
|
+
Daru::Vector.new([11,12,13,14,15],index: [:one, :two, :three, :four, :five]),
|
1065
|
+
Daru::Vector.new([21,22,23,24,25],index: [:one, :two, :three, :four, :five]),
|
1066
|
+
Daru::Vector.new([21,32,43,54,65],index: [:one, :two, :three, :four, :five])])
|
1067
|
+
expect(idx).to eq([:a, :b, :c])
|
858
1068
|
end
|
859
1069
|
end
|
860
1070
|
|
861
1071
|
context "#map_rows_with_index" do
|
862
1072
|
it "iterates over rows with index and returns a modified DataFrame" do
|
863
|
-
ans = Daru::DataFrame.new({b: [121, 144, 169, 196, 225], a: [1,4,9,16,25],
|
864
|
-
c: [121, 484, 1089, 1936, 3025]},order: [:a, :b, :c],
|
865
|
-
index: [:one, :two, :three, :four, :five])
|
866
|
-
|
867
1073
|
idx = []
|
868
1074
|
ret = @data_frame.map_rows_with_index do |row, index|
|
869
1075
|
idx << index
|
870
1076
|
expect(row.class).to eq(Daru::Vector)
|
871
|
-
row
|
1077
|
+
row[:a] * row[:c]
|
872
1078
|
end
|
873
1079
|
|
874
|
-
expect(ret).to eq(
|
1080
|
+
expect(ret).to eq([11, 44, 99, 176, 275])
|
875
1081
|
expect(idx).to eq([:one, :two, :three, :four, :five])
|
876
1082
|
end
|
877
1083
|
end
|
@@ -926,6 +1132,23 @@ describe Daru::DataFrame do
|
|
926
1132
|
end
|
927
1133
|
end
|
928
1134
|
|
1135
|
+
context "#filter_field" do
|
1136
|
+
before do
|
1137
|
+
@df = Daru::DataFrame.new({
|
1138
|
+
:id => Daru::Vector.new([1, 2, 3, 4, 5]),
|
1139
|
+
:name => Daru::Vector.new(%w(Alex Claude Peter Franz George)),
|
1140
|
+
:age => Daru::Vector.new([20, 23, 25, 27, 5]),
|
1141
|
+
:city => Daru::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
|
1142
|
+
:a1 => Daru::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c']) },
|
1143
|
+
order: [:id, :name, :age, :city, :a1])
|
1144
|
+
end
|
1145
|
+
|
1146
|
+
it "creates new vector with the data of a given field for which block returns true" do
|
1147
|
+
filtered = @df.filter_vector(:id) { |c| c[:id] == 2 or c[:id] == 4 }
|
1148
|
+
expect(filtered).to eq(Daru::Vector.new([2,4]))
|
1149
|
+
end
|
1150
|
+
end
|
1151
|
+
|
929
1152
|
context "#filter_rows" do
|
930
1153
|
context Daru::Index do
|
931
1154
|
it "filters rows" do
|
@@ -988,6 +1211,21 @@ describe Daru::DataFrame do
|
|
988
1211
|
end
|
989
1212
|
end
|
990
1213
|
|
1214
|
+
context "#to_hash" do
|
1215
|
+
it "converts to a hash" do
|
1216
|
+
expect(@data_frame.to_hash).to eq(
|
1217
|
+
{
|
1218
|
+
a: Daru::Vector.new([1,2,3,4,5],
|
1219
|
+
index: [:one, :two, :three, :four, :five]),
|
1220
|
+
b: Daru::Vector.new([11,12,13,14,15],
|
1221
|
+
index: [:one, :two, :three, :four, :five]),
|
1222
|
+
c: Daru::Vector.new([11,22,33,44,55],
|
1223
|
+
index: [:one, :two, :three, :four, :five])
|
1224
|
+
}
|
1225
|
+
)
|
1226
|
+
end
|
1227
|
+
end
|
1228
|
+
|
991
1229
|
context "#recast" do
|
992
1230
|
it "recasts underlying vectors" do
|
993
1231
|
@data_frame.recast a: :nmatrix, c: :nmatrix
|
@@ -1137,6 +1375,28 @@ describe Daru::DataFrame do
|
|
1137
1375
|
end
|
1138
1376
|
end
|
1139
1377
|
|
1378
|
+
context "#reindex_vectors!" do
|
1379
|
+
before :each do
|
1380
|
+
@df = Daru::DataFrame.new({
|
1381
|
+
a: [1,2,3,4,5],
|
1382
|
+
b: [11,22,33,44,55],
|
1383
|
+
c: %w(a b c d e)
|
1384
|
+
})
|
1385
|
+
end
|
1386
|
+
|
1387
|
+
it "changes names of vectors" do
|
1388
|
+
ans = Daru::DataFrame.new({
|
1389
|
+
a: [1,2,3,4,5],
|
1390
|
+
b: [11,22,33,44,55],
|
1391
|
+
c: %w(a b c d e)
|
1392
|
+
}, order: [:b, :c, :a])
|
1393
|
+
|
1394
|
+
expect(@df.reindex_vectors!([:b,:c,:a])).to eq(Daru::Index.new([:b,:c,:a], [1,2,0]))
|
1395
|
+
expect(@df[:a]).to eq(Daru::Vector.new([1,2,3,4,5]))
|
1396
|
+
expect(@df[:c]).to eq(Daru::Vector.new(%w(a b c d e)))
|
1397
|
+
end
|
1398
|
+
end
|
1399
|
+
|
1140
1400
|
context "#to_matrix" do
|
1141
1401
|
before do
|
1142
1402
|
@df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
@@ -1292,7 +1552,7 @@ describe Daru::DataFrame do
|
|
1292
1552
|
))
|
1293
1553
|
end
|
1294
1554
|
|
1295
|
-
it "creates row and vector index with (double) index and (double) vector args"
|
1555
|
+
it "creates row and vector index with (double) index and (double) vector args" do
|
1296
1556
|
agg_index = Daru::MultiIndex.new([
|
1297
1557
|
[:bar, 4],
|
1298
1558
|
[:bar, 5],
|
@@ -1394,4 +1654,439 @@ describe Daru::DataFrame do
|
|
1394
1654
|
expect(@data_frame.shape).to eq([5,3])
|
1395
1655
|
end
|
1396
1656
|
end
|
1657
|
+
|
1658
|
+
context "#nest" do
|
1659
|
+
it "nests in a hash" do
|
1660
|
+
df = Daru::DataFrame.new({
|
1661
|
+
:a => Daru::Vector.new(%w(a a a b b b)),
|
1662
|
+
:b => Daru::Vector.new(%w(c c d d e e)),
|
1663
|
+
:c => Daru::Vector.new(%w(f g h i j k))
|
1664
|
+
})
|
1665
|
+
nest = df.nest :a, :b
|
1666
|
+
expect(nest['a']['c']).to eq([{ :c => 'f' }, { :c => 'g' }])
|
1667
|
+
expect(nest['a']['d']).to eq([{ :c => 'h' }])
|
1668
|
+
expect(nest['b']['e']).to eq([{ :c => 'j' }, { :c => 'k' }])
|
1669
|
+
end
|
1670
|
+
end
|
1671
|
+
|
1672
|
+
context "#summary" do
|
1673
|
+
it "produces a summary of data frame" do
|
1674
|
+
expect(@data_frame.summary.match("#{@data_frame.name}")).to_not eq(nil)
|
1675
|
+
expect(@df_mi.summary.match("#{@df_mi.name}")).to_not eq(nil)
|
1676
|
+
end
|
1677
|
+
end
|
1678
|
+
|
1679
|
+
context "#to_gsl" do
|
1680
|
+
it "converts to GSL::Matrix" do
|
1681
|
+
rows = [[1,2,3,4,5],[11,12,13,14,15],[11,22,33,44,55]].transpose
|
1682
|
+
mat = GSL::Matrix.alloc *rows
|
1683
|
+
expect(@data_frame.to_gsl).to eq(mat)
|
1684
|
+
end
|
1685
|
+
end
|
1686
|
+
|
1687
|
+
context "#merge" do
|
1688
|
+
it "merges one dataframe with another" do
|
1689
|
+
a = Daru::Vector.new [1, 2, 3]
|
1690
|
+
b = Daru::Vector.new [3, 4, 5]
|
1691
|
+
c = Daru::Vector.new [4, 5, 6]
|
1692
|
+
d = Daru::Vector.new [7, 8, 9]
|
1693
|
+
e = Daru::Vector.new [10, 20, 30]
|
1694
|
+
ds1 = Daru::DataFrame.new({ :a => a, :b => b })
|
1695
|
+
ds2 = Daru::DataFrame.new({ :c => c, :d => d })
|
1696
|
+
exp = Daru::DataFrame.new({ :a => a, :b => b, :c => c, :d => d })
|
1697
|
+
|
1698
|
+
expect(ds1.merge(ds2)).to eq(exp)
|
1699
|
+
expect(ds2.merge(ds1)).to eq(
|
1700
|
+
Daru::DataFrame.new({c: c, d: d, a: a, b: b}, order: [:c, :d, :a, :b]))
|
1701
|
+
|
1702
|
+
ds3 = Daru::DataFrame.new({ :a => e })
|
1703
|
+
exp = Daru::DataFrame.new({ :a_1 => a, :a_2 => e, :b => b },
|
1704
|
+
order: [:a_1, :b, :a_2])
|
1705
|
+
|
1706
|
+
expect(ds1.merge(ds3)).to eq(exp)
|
1707
|
+
end
|
1708
|
+
end
|
1709
|
+
|
1710
|
+
context "#vector_by_calculation" do
|
1711
|
+
it "DSL for returning vector of each calculation" do
|
1712
|
+
a1 = Daru::Vector.new([1, 2, 3, 4, 5, 6, 7])
|
1713
|
+
a2 = Daru::Vector.new([10, 20, 30, 40, 50, 60, 70])
|
1714
|
+
a3 = Daru::Vector.new([100, 200, 300, 400, 500, 600, 700])
|
1715
|
+
ds = Daru::DataFrame.new({ :a => a1, :b => a2, :c => a3 })
|
1716
|
+
total = ds.vector_by_calculation { a + b + c }
|
1717
|
+
expected = Daru::Vector.new([111, 222, 333, 444, 555, 666, 777])
|
1718
|
+
expect(total).to eq(expected)
|
1719
|
+
end
|
1720
|
+
end
|
1721
|
+
|
1722
|
+
context "#vector_sum" do
|
1723
|
+
before do
|
1724
|
+
a1 = Daru::Vector.new [1, 2, 3, 4, 5, nil]
|
1725
|
+
a2 = Daru::Vector.new [10, 10, 20, 20, 20, 30]
|
1726
|
+
b1 = Daru::Vector.new [nil, 1, 1, 1, 1, 2]
|
1727
|
+
b2 = Daru::Vector.new [2, 2, 2, nil, 2, 3]
|
1728
|
+
@df = Daru::DataFrame.new({ :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2 })
|
1729
|
+
end
|
1730
|
+
|
1731
|
+
it "calculates complete vector sum" do
|
1732
|
+
expect(@df.vector_sum).to eq(Daru::Vector.new [nil, 15, 26, nil, 28, nil])
|
1733
|
+
end
|
1734
|
+
|
1735
|
+
it "calculates partial vector sum" do
|
1736
|
+
a = @df.vector_sum([:a1, :a2])
|
1737
|
+
b = @df.vector_sum([:b1, :b2])
|
1738
|
+
|
1739
|
+
expect(a).to eq(Daru::Vector.new [11, 12, 23, 24, 25, nil])
|
1740
|
+
expect(b).to eq(Daru::Vector.new [nil, 3, 3, nil, 3, 5])
|
1741
|
+
end
|
1742
|
+
end
|
1743
|
+
|
1744
|
+
context "#missing_values_rows" do
|
1745
|
+
it "returns number of missing values in each row" do
|
1746
|
+
a1 = Daru::Vector.new [1, nil, 3, 4, 5, nil]
|
1747
|
+
a2 = Daru::Vector.new [10, nil, 20, 20, 20, 30]
|
1748
|
+
b1 = Daru::Vector.new [nil, nil, 1, 1, 1, 2]
|
1749
|
+
b2 = Daru::Vector.new [2, 2, 2, nil, 2, 3]
|
1750
|
+
c = Daru::Vector.new [nil, 2, 4, 2, 2, 2]
|
1751
|
+
df = Daru::DataFrame.new({
|
1752
|
+
:a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2, :c => c })
|
1753
|
+
|
1754
|
+
expect(df.missing_values_rows).to eq(Daru::Vector.new [2, 3, 0, 1, 0, 1])
|
1755
|
+
end
|
1756
|
+
end
|
1757
|
+
|
1758
|
+
context "#vector_count_characters" do
|
1759
|
+
it "" do
|
1760
|
+
a1 = Daru::Vector.new( [1, 'abcde', 3, 4, 5, nil])
|
1761
|
+
a2 = Daru::Vector.new( [10, 20.3, 20, 20, 20, 30])
|
1762
|
+
b1 = Daru::Vector.new( [nil, '343434', 1, 1, 1, 2])
|
1763
|
+
b2 = Daru::Vector.new( [2, 2, 2, nil, 2, 3])
|
1764
|
+
c = Daru::Vector.new([nil, 2, 'This is a nice example', 2, 2, 2])
|
1765
|
+
ds = Daru::DataFrame.new({ :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2, :c => c })
|
1766
|
+
|
1767
|
+
expect(ds.vector_count_characters).to eq(Daru::Vector.new([4, 17, 27, 5, 6, 5]))
|
1768
|
+
end
|
1769
|
+
end
|
1770
|
+
|
1771
|
+
|
1772
|
+
context "has_missing_data?" do
|
1773
|
+
before do
|
1774
|
+
a1 = Daru::Vector.new [1, nil, 3, 4, 5, nil]
|
1775
|
+
a2 = Daru::Vector.new [10, nil, 20, 20, 20, 30]
|
1776
|
+
b1 = Daru::Vector.new [nil, nil, 1, 1, 1, 2]
|
1777
|
+
b2 = Daru::Vector.new [2, 2, 2, nil, 2, 3]
|
1778
|
+
c = Daru::Vector.new [nil, 2, 4, 2, 2, 2]
|
1779
|
+
@df = Daru::DataFrame.new({ :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2, :c => c })
|
1780
|
+
end
|
1781
|
+
|
1782
|
+
it "returns true when missing data present" do
|
1783
|
+
expect(@df.has_missing_data?).to eq(true)
|
1784
|
+
end
|
1785
|
+
|
1786
|
+
it "returns false when no missing data prensent" do
|
1787
|
+
a = @df.dup_only_valid
|
1788
|
+
expect(a.has_missing_data?).to eq(false)
|
1789
|
+
end
|
1790
|
+
end
|
1791
|
+
|
1792
|
+
context "#vector_mean" do
|
1793
|
+
before do
|
1794
|
+
a1 = Daru::Vector.new [1, 2, 3, 4, 5, nil]
|
1795
|
+
a2 = Daru::Vector.new [10, 10, 20, 20, 20, 30]
|
1796
|
+
b1 = Daru::Vector.new [nil, 1, 1, 1, 1, 2]
|
1797
|
+
b2 = Daru::Vector.new [2, 2, 2, nil, 2, 3]
|
1798
|
+
c = Daru::Vector.new [nil, 2, 4, 2, 2, 2]
|
1799
|
+
@df = Daru::DataFrame.new({
|
1800
|
+
:a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2, :c => c })
|
1801
|
+
end
|
1802
|
+
|
1803
|
+
it "calculates complete vector mean" do
|
1804
|
+
expect(@df.vector_mean).to eq(
|
1805
|
+
Daru::Vector.new [nil, 3.4, 6, nil, 6.0, nil])
|
1806
|
+
end
|
1807
|
+
end
|
1808
|
+
|
1809
|
+
context "#add_vectors_by_split_recode" do
|
1810
|
+
before do
|
1811
|
+
@ds = Daru::DataFrame.new({
|
1812
|
+
:id => Daru::Vector.new([1, 2, 3, 4, 5]),
|
1813
|
+
:name => Daru::Vector.new(%w(Alex Claude Peter Franz George)),
|
1814
|
+
:age => Daru::Vector.new([20, 23, 25, 27, 5]),
|
1815
|
+
:city => Daru::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
|
1816
|
+
:a1 => Daru::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c']) },
|
1817
|
+
order: [:id, :name, :age, :city, :a1])
|
1818
|
+
end
|
1819
|
+
|
1820
|
+
it "" do
|
1821
|
+
@ds.add_vectors_by_split_recode(:a1, '_')
|
1822
|
+
expect(@ds.vectors.to_a) .to eq([:id, :name, :age, :city ,:a1, :a1_1, :a1_2, :a1_3])
|
1823
|
+
expect(@ds[:a1_1].to_a).to eq([1, 0, 1, nil, 1])
|
1824
|
+
expect(@ds[:a1_2].to_a).to eq([1, 1, 0, nil, 1])
|
1825
|
+
expect(@ds[:a1_3].to_a).to eq([0, 1, 0, nil, 1])
|
1826
|
+
end
|
1827
|
+
end
|
1828
|
+
|
1829
|
+
context "#add_vectors_by_split" do
|
1830
|
+
before do
|
1831
|
+
@ds = Daru::DataFrame.new({
|
1832
|
+
:id => Daru::Vector.new([1, 2, 3, 4, 5]),
|
1833
|
+
:name => Daru::Vector.new(%w(Alex Claude Peter Franz George)),
|
1834
|
+
:age => Daru::Vector.new([20, 23, 25, 27, 5]),
|
1835
|
+
:city => Daru::Vector.new(['New York', 'London', 'London', 'Paris', 'Tome']),
|
1836
|
+
:a1 => Daru::Vector.new(['a,b', 'b,c', 'a', nil, 'a,b,c'])
|
1837
|
+
}, order: [:id, :name, :age, :city, :a1])
|
1838
|
+
end
|
1839
|
+
|
1840
|
+
it "" do
|
1841
|
+
@ds.add_vectors_by_split(:a1, '_')
|
1842
|
+
expect(@ds.vectors.to_a).to eq([:id, :name, :age, :city, :a1, :a1_a, :a1_b, :a1_c])
|
1843
|
+
expect(@ds[:a1_a].to_a).to eq([1, 0, 1, nil, 1])
|
1844
|
+
expect(@ds[:a1_b].to_a).to eq([1, 1, 0, nil, 1])
|
1845
|
+
expect(@ds[:a1_c].to_a).to eq([0, 1, 0, nil, 1])
|
1846
|
+
end
|
1847
|
+
end
|
1848
|
+
|
1849
|
+
context "#verify" do
|
1850
|
+
def create_test(*args, &_proc)
|
1851
|
+
description = args.shift
|
1852
|
+
fields = args
|
1853
|
+
[description, fields, Proc.new]
|
1854
|
+
end
|
1855
|
+
|
1856
|
+
before do
|
1857
|
+
name = Daru::Vector.new %w(r1 r2 r3 r4)
|
1858
|
+
v1 = Daru::Vector.new [1, 2, 3, 4]
|
1859
|
+
v2 = Daru::Vector.new [4, 3, 2, 1]
|
1860
|
+
v3 = Daru::Vector.new [10, 20, 30, 40]
|
1861
|
+
v4 = Daru::Vector.new %w(a b a b)
|
1862
|
+
@df = Daru::DataFrame.new({
|
1863
|
+
:v1 => v1, :v2 => v2, :v3 => v3, :v4 => v4, :id => name
|
1864
|
+
}, order: [:v1, :v2, :v3, :v4, :id])
|
1865
|
+
end
|
1866
|
+
|
1867
|
+
it "correctly verifies data as per the block" do
|
1868
|
+
# Correct
|
1869
|
+
t1 = create_test('If v4=a, v1 odd') do |r|
|
1870
|
+
r[:v4] == 'b' or (r[:v4] == 'a' and r[:v1].odd?)
|
1871
|
+
end
|
1872
|
+
t2 = create_test('v3=v1*10') { |r| r[:v3] == r[:v1] * 10 }
|
1873
|
+
# Fail!
|
1874
|
+
t3 = create_test("v4='b'") { |r| r[:v4] == 'b' }
|
1875
|
+
exp1 = ["1 [1]: v4='b'", "3 [3]: v4='b'"]
|
1876
|
+
exp2 = ["1 [r1]: v4='b'", "3 [r3]: v4='b'"]
|
1877
|
+
|
1878
|
+
dataf = @df.verify(t3, t1, t2)
|
1879
|
+
expect(dataf).to eq(exp1)
|
1880
|
+
|
1881
|
+
dataf = @df.verify(:id, t1, t2, t3)
|
1882
|
+
expect(dataf).to eq(exp2)
|
1883
|
+
end
|
1884
|
+
end
|
1885
|
+
|
1886
|
+
context "#compute" do
|
1887
|
+
it "performs a computation when supplied in a string" do
|
1888
|
+
v1 = Daru::Vector.new [1, 2, 3, 4]
|
1889
|
+
v2 = Daru::Vector.new [4, 3, 2, 1]
|
1890
|
+
v3 = Daru::Vector.new [10, 20, 30, 40]
|
1891
|
+
vnumeric = Daru::Vector.new [0, 0, 1, 4]
|
1892
|
+
vsum = Daru::Vector.new [1 + 4 + 10.0, 2 + 3 + 20.0, 3 + 2 + 30.0, 4 + 1 + 40.0]
|
1893
|
+
vmult = Daru::Vector.new [1 * 4, 2 * 3, 3 * 2, 4 * 1]
|
1894
|
+
|
1895
|
+
df = Daru::DataFrame.new({:v1 => v1, :v2 => v2, :v3 => v3})
|
1896
|
+
|
1897
|
+
expect(df.compute("v1/v2")).to eq(vnumeric)
|
1898
|
+
expect(df.compute("v1+v2+v3")).to eq(vsum)
|
1899
|
+
expect(df.compute("v1*v2")).to eq(vmult)
|
1900
|
+
end
|
1901
|
+
end
|
1902
|
+
|
1903
|
+
context ".crosstab_by_assignation" do
|
1904
|
+
it "" do
|
1905
|
+
v1 = Daru::Vector.new %w(a a a b b b c c c)
|
1906
|
+
v2 = Daru::Vector.new %w(a b c a b c a b c)
|
1907
|
+
v3 = Daru::Vector.new [0, 1, 0, 0, 1, 1, 0, 0, 1]
|
1908
|
+
df = Daru::DataFrame.crosstab_by_assignation(v1, v2, v3)
|
1909
|
+
|
1910
|
+
expect(df[:_id].type).to eq(:object)
|
1911
|
+
expect(df[:a].type).to eq(:numeric)
|
1912
|
+
expect(df[:b].type).to eq(:numeric)
|
1913
|
+
|
1914
|
+
ev_id = Daru::Vector.new %w(a b c)
|
1915
|
+
ev_a = Daru::Vector.new [0, 0, 0]
|
1916
|
+
ev_b = Daru::Vector.new [1, 1, 0]
|
1917
|
+
ev_c = Daru::Vector.new [0, 1, 1]
|
1918
|
+
df2 = Daru::DataFrame.new({
|
1919
|
+
:_id => ev_id, :a => ev_a, :b => ev_b, :c => ev_c })
|
1920
|
+
|
1921
|
+
expect(df2).to eq(df)
|
1922
|
+
end
|
1923
|
+
end
|
1924
|
+
|
1925
|
+
context "#one_to_many" do
|
1926
|
+
it "" do
|
1927
|
+
rows = [
|
1928
|
+
['1', 'george', 'red', 10, 'blue', 20, nil, nil],
|
1929
|
+
['2', 'fred', 'green', 15, 'orange', 30, 'white', 20],
|
1930
|
+
['3', 'alfred', nil, nil, nil, nil, nil, nil]
|
1931
|
+
]
|
1932
|
+
df = Daru::DataFrame.rows(rows,
|
1933
|
+
order: [:id, :name, :car_color1, :car_value1, :car_color2,
|
1934
|
+
:car_value2, :car_color3, :car_value3])
|
1935
|
+
|
1936
|
+
ids = Daru::Vector.new %w(1 1 2 2 2)
|
1937
|
+
colors = Daru::Vector.new %w(red blue green orange white)
|
1938
|
+
values = Daru::Vector.new [10, 20, 15, 30, 20]
|
1939
|
+
col_ids = Daru::Vector.new [1, 2, 1, 2, 3]
|
1940
|
+
df_expected = Daru::DataFrame.new({
|
1941
|
+
:id => ids, :_col_id => col_ids, :color => colors, :value => values
|
1942
|
+
}, order: [:id, :_col_id, :color, :value])
|
1943
|
+
|
1944
|
+
expect(df.one_to_many([:id], 'car_%v%n')).to eq(df_expected)
|
1945
|
+
end
|
1946
|
+
end
|
1947
|
+
|
1948
|
+
context "#any?" do
|
1949
|
+
before do
|
1950
|
+
@df = Daru::DataFrame.new({
|
1951
|
+
a: [1,2,3,4,5],
|
1952
|
+
b: [10,20,30,40,50],
|
1953
|
+
c: [11,22,33,44,55]})
|
1954
|
+
end
|
1955
|
+
|
1956
|
+
it "returns true if any one of the vectors satisfy condition" do
|
1957
|
+
expect(@df.any? { |v| v[0] == 1 }).to eq(true)
|
1958
|
+
end
|
1959
|
+
|
1960
|
+
it "returns false if none of the vectors satisfy the condition" do
|
1961
|
+
expect(@df.any? { |v| v.mean > 100 }).to eq(false)
|
1962
|
+
end
|
1963
|
+
|
1964
|
+
it "returns true if any one of the rows satisfy condition" do
|
1965
|
+
expect(@df.any?(:row) { |r| r[:a] == 1 and r[:c] == 11 }).to eq(true)
|
1966
|
+
end
|
1967
|
+
|
1968
|
+
it "returns false if none of the rows satisfy the condition" do
|
1969
|
+
expect(@df.any?(:row) { |r| r.mean > 100 }).to eq(false)
|
1970
|
+
end
|
1971
|
+
end
|
1972
|
+
|
1973
|
+
context "#all?" do
|
1974
|
+
before do
|
1975
|
+
@df = Daru::DataFrame.new({
|
1976
|
+
a: [1,2,3,4,5],
|
1977
|
+
b: [10,20,30,40,50],
|
1978
|
+
c: [11,22,33,44,55]})
|
1979
|
+
end
|
1980
|
+
|
1981
|
+
it "returns true if all of the vectors satisfy condition" do
|
1982
|
+
expect(@df.all? { |v| v.mean < 40 }).to eq(true)
|
1983
|
+
end
|
1984
|
+
|
1985
|
+
it "returns false if any one of the vectors does not satisfy condition" do
|
1986
|
+
expect(@df.all? { |v| v.mean == 30 }).to eq(false)
|
1987
|
+
end
|
1988
|
+
|
1989
|
+
it "returns true if all of the rows satisfy condition" do
|
1990
|
+
expect(@df.all?(:row) { |r| r.mean < 70 }).to eq(true)
|
1991
|
+
end
|
1992
|
+
|
1993
|
+
it "returns false if any one of the rows does not satisfy condition" do
|
1994
|
+
expect(@df.all?(:row) { |r| r.mean == 30 }).to eq(false)
|
1995
|
+
end
|
1996
|
+
end
|
1997
|
+
|
1998
|
+
context "#only_numerics" do
|
1999
|
+
before do
|
2000
|
+
@v1 = Daru::Vector.new([1,2,3,4,5])
|
2001
|
+
@v2 = Daru::Vector.new(%w(one two three four five))
|
2002
|
+
@v3 = Daru::Vector.new([11,22,33,44,55])
|
2003
|
+
@df = Daru::DataFrame.new({
|
2004
|
+
a: @v1, b: @v2, c: @v3 }, clone: false)
|
2005
|
+
end
|
2006
|
+
|
2007
|
+
it "returns a view of only the numeric vectors" do
|
2008
|
+
dfon = @df.only_numerics(clone: false)
|
2009
|
+
|
2010
|
+
expect(dfon).to eq(
|
2011
|
+
Daru::DataFrame.new({ a: @v1, c: @v3 }, clone: false))
|
2012
|
+
expect(dfon[:a].object_id).to eq(@v1.object_id)
|
2013
|
+
end
|
2014
|
+
|
2015
|
+
it "returns a clone of numeric vectors" do
|
2016
|
+
dfon = @df.only_numerics
|
2017
|
+
|
2018
|
+
expect(dfon).to eq(
|
2019
|
+
Daru::DataFrame.new({ a: @v1, c: @v3}, clone: false)
|
2020
|
+
)
|
2021
|
+
expect(dfon[:a].object_id).to_not eq(@v1.object_id)
|
2022
|
+
end
|
2023
|
+
|
2024
|
+
context Daru::MultiIndex do
|
2025
|
+
before do
|
2026
|
+
agg_vectors = Daru::MultiIndex.new(
|
2027
|
+
[
|
2028
|
+
[:d, :one, :large],
|
2029
|
+
[:d, :one, :small],
|
2030
|
+
[:d, :two, :large],
|
2031
|
+
[:d, :two, :small],
|
2032
|
+
[:e, :one, :large],
|
2033
|
+
[:e, :one, :small],
|
2034
|
+
[:e, :two, :large],
|
2035
|
+
[:e, :two, :small]
|
2036
|
+
]
|
2037
|
+
)
|
2038
|
+
|
2039
|
+
agg_index = Daru::MultiIndex.new(
|
2040
|
+
[
|
2041
|
+
[:bar],
|
2042
|
+
[:foo]
|
2043
|
+
]
|
2044
|
+
)
|
2045
|
+
@df = Daru::DataFrame.new(
|
2046
|
+
[
|
2047
|
+
[4.112,2.234],
|
2048
|
+
%w(a b),
|
2049
|
+
[6.342,nil],
|
2050
|
+
[7.2344,3.23214],
|
2051
|
+
[8.234,4.533],
|
2052
|
+
[10.342,2.3432],
|
2053
|
+
[12.0,nil],
|
2054
|
+
%w(a b)
|
2055
|
+
], order: agg_vectors, index: agg_index
|
2056
|
+
)
|
2057
|
+
end
|
2058
|
+
|
2059
|
+
it "returns numeric vectors" do
|
2060
|
+
vectors = Daru::MultiIndex.new(
|
2061
|
+
[
|
2062
|
+
[:d, :one, :large],
|
2063
|
+
[:d, :two, :large],
|
2064
|
+
[:d, :two, :small],
|
2065
|
+
[:e, :one, :large],
|
2066
|
+
[:e, :one, :small],
|
2067
|
+
[:e, :two, :large]
|
2068
|
+
]
|
2069
|
+
)
|
2070
|
+
|
2071
|
+
index = Daru::MultiIndex.new(
|
2072
|
+
[
|
2073
|
+
[:bar],
|
2074
|
+
[:foo]
|
2075
|
+
]
|
2076
|
+
)
|
2077
|
+
answer = Daru::DataFrame.new(
|
2078
|
+
[
|
2079
|
+
[4.112,2.234],
|
2080
|
+
[6.342,nil],
|
2081
|
+
[7.2344,3.23214],
|
2082
|
+
[8.234,4.533],
|
2083
|
+
[10.342,2.3432],
|
2084
|
+
[12.0,nil],
|
2085
|
+
], order: vectors, index: index
|
2086
|
+
)
|
2087
|
+
|
2088
|
+
expect(@df.only_numerics).to eq(answer)
|
2089
|
+
end
|
2090
|
+
end
|
2091
|
+
end
|
1397
2092
|
end if mri?
|