daru 0.1.3.1 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rspec +2 -1
  4. data/.rspec_formatter.rb +33 -0
  5. data/.rubocop.yml +26 -2
  6. data/History.md +38 -0
  7. data/README.md +22 -13
  8. data/Rakefile +50 -2
  9. data/benchmarks/csv_reading.rb +22 -0
  10. data/daru.gemspec +9 -2
  11. data/lib/daru.rb +36 -4
  12. data/lib/daru/accessors/array_wrapper.rb +6 -1
  13. data/lib/daru/accessors/dataframe_by_row.rb +10 -2
  14. data/lib/daru/accessors/gsl_wrapper.rb +1 -3
  15. data/lib/daru/accessors/nmatrix_wrapper.rb +9 -0
  16. data/lib/daru/category.rb +935 -0
  17. data/lib/daru/core/group_by.rb +29 -38
  18. data/lib/daru/core/merge.rb +186 -145
  19. data/lib/daru/core/query.rb +22 -11
  20. data/lib/daru/dataframe.rb +976 -885
  21. data/lib/daru/date_time/index.rb +166 -166
  22. data/lib/daru/date_time/offsets.rb +66 -77
  23. data/lib/daru/formatters/table.rb +54 -0
  24. data/lib/daru/helpers/array.rb +40 -0
  25. data/lib/daru/index.rb +476 -73
  26. data/lib/daru/io/io.rb +66 -45
  27. data/lib/daru/io/sql_data_source.rb +33 -62
  28. data/lib/daru/iruby/helpers.rb +38 -0
  29. data/lib/daru/iruby/templates/dataframe.html.erb +52 -0
  30. data/lib/daru/iruby/templates/dataframe_mi.html.erb +58 -0
  31. data/lib/daru/iruby/templates/multi_index.html.erb +12 -0
  32. data/lib/daru/iruby/templates/vector.html.erb +27 -0
  33. data/lib/daru/iruby/templates/vector_mi.html.erb +36 -0
  34. data/lib/daru/maths/arithmetic/dataframe.rb +16 -18
  35. data/lib/daru/maths/arithmetic/vector.rb +4 -6
  36. data/lib/daru/maths/statistics/dataframe.rb +8 -15
  37. data/lib/daru/maths/statistics/vector.rb +120 -98
  38. data/lib/daru/monkeys.rb +12 -40
  39. data/lib/daru/plotting/gruff.rb +3 -0
  40. data/lib/daru/plotting/gruff/category.rb +49 -0
  41. data/lib/daru/plotting/gruff/dataframe.rb +91 -0
  42. data/lib/daru/plotting/gruff/vector.rb +57 -0
  43. data/lib/daru/plotting/nyaplot.rb +3 -0
  44. data/lib/daru/plotting/nyaplot/category.rb +34 -0
  45. data/lib/daru/plotting/nyaplot/dataframe.rb +187 -0
  46. data/lib/daru/plotting/nyaplot/vector.rb +46 -0
  47. data/lib/daru/vector.rb +694 -421
  48. data/lib/daru/version.rb +1 -1
  49. data/profile/_base.rb +23 -0
  50. data/profile/df_to_a.rb +10 -0
  51. data/profile/filter.rb +13 -0
  52. data/profile/joining.rb +13 -0
  53. data/profile/sorting.rb +12 -0
  54. data/profile/vector_each_with_index.rb +9 -0
  55. data/spec/accessors/wrappers_spec.rb +2 -4
  56. data/spec/categorical_spec.rb +1734 -0
  57. data/spec/core/group_by_spec.rb +52 -2
  58. data/spec/core/merge_spec.rb +63 -2
  59. data/spec/core/query_spec.rb +236 -80
  60. data/spec/dataframe_spec.rb +1373 -79
  61. data/spec/date_time/data_spec.rb +3 -5
  62. data/spec/date_time/index_spec.rb +154 -17
  63. data/spec/date_time/offsets_spec.rb +3 -4
  64. data/spec/fixtures/empties.dat +2 -0
  65. data/spec/fixtures/strings.dat +2 -0
  66. data/spec/formatters/table_formatter_spec.rb +99 -0
  67. data/spec/helpers_spec.rb +8 -0
  68. data/spec/index/categorical_index_spec.rb +168 -0
  69. data/spec/index/index_spec.rb +283 -0
  70. data/spec/index/multi_index_spec.rb +570 -0
  71. data/spec/io/io_spec.rb +31 -4
  72. data/spec/io/sql_data_source_spec.rb +0 -1
  73. data/spec/iruby/dataframe_spec.rb +172 -0
  74. data/spec/iruby/helpers_spec.rb +49 -0
  75. data/spec/iruby/multi_index_spec.rb +37 -0
  76. data/spec/iruby/vector_spec.rb +107 -0
  77. data/spec/math/arithmetic/dataframe_spec.rb +71 -13
  78. data/spec/math/arithmetic/vector_spec.rb +8 -10
  79. data/spec/math/statistics/dataframe_spec.rb +3 -5
  80. data/spec/math/statistics/vector_spec.rb +45 -55
  81. data/spec/monkeys_spec.rb +32 -9
  82. data/spec/plotting/dataframe_spec.rb +386 -0
  83. data/spec/plotting/vector_spec.rb +230 -0
  84. data/spec/shared/vector_display_spec.rb +215 -0
  85. data/spec/spec_helper.rb +23 -0
  86. data/spec/vector_spec.rb +905 -138
  87. metadata +143 -11
  88. data/.rubocop_todo.yml +0 -44
  89. data/lib/daru/plotting/dataframe.rb +0 -104
  90. data/lib/daru/plotting/vector.rb +0 -38
  91. data/spec/daru_spec.rb +0 -58
  92. data/spec/index_spec.rb +0 -375
@@ -1,5 +1,3 @@
1
- require 'spec_helper.rb'
2
-
3
1
  describe Daru::DataFrame do
4
2
  before :each do
5
3
  @data_frame = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
@@ -66,6 +64,33 @@ describe Daru::DataFrame do
66
64
  expect(df.vectors) .to eq(Daru::Index.new [:a,:b,:c,:d,:e])
67
65
  expect(df[:a]) .to eq(Daru::Vector.new [1,1,1,1])
68
66
  end
67
+
68
+ it 'derives index & order from arrays' do
69
+ df = Daru::DataFrame.rows @rows
70
+ expect(df.index) .to eq(Daru::Index.new [0,1,2,3])
71
+ expect(df.vectors) .to eq(Daru::Index.new %w[0 1 2 3 4])
72
+ end
73
+
74
+ it 'derives index & order from vectors' do
75
+ rows = @rows.zip(%w[w x y z]).map { |r, n| Daru::Vector.new r, index: [:a,:b,:c,:d,:e], name: n }
76
+ df = Daru::DataFrame.rows rows
77
+ expect(df.index) .to eq(Daru::Index.new %w[w x y z])
78
+ expect(df.vectors) .to eq(Daru::Index.new [:a,:b,:c,:d,:e])
79
+ end
80
+
81
+ it 'behaves, when rows are repeated' do
82
+ rows = @rows.zip(%w[w w y z]).map { |r, n| Daru::Vector.new r, index: [:a,:b,:c,:d,:e], name: n }
83
+ df = Daru::DataFrame.rows rows
84
+ expect(df.index) .to eq(Daru::Index.new %w[w_1 w_2 y z])
85
+ expect(df.vectors) .to eq(Daru::Index.new [:a,:b,:c,:d,:e])
86
+ end
87
+
88
+ it 'behaves, when vectors are unnamed' do
89
+ rows = @rows.map { |r| Daru::Vector.new r, index: [:a,:b,:c,:d,:e] }
90
+ df = Daru::DataFrame.rows rows
91
+ expect(df.index) .to eq(Daru::Index.new [0,1,2,3])
92
+ expect(df.vectors) .to eq(Daru::Index.new [:a,:b,:c,:d,:e])
93
+ end
69
94
  end
70
95
 
71
96
  context Daru::MultiIndex do
@@ -133,7 +158,7 @@ describe Daru::DataFrame do
133
158
  end
134
159
 
135
160
  it "initializes from a Hash of Vectors" do
136
- va = Daru::Vector.new([1,2,3,4,5], metadata: { cdc_type: 2 }, index: [:one, :two, :three, :four, :five])
161
+ va = Daru::Vector.new([1,2,3,4,5], index: [:one, :two, :three, :four, :five])
137
162
  vb = Daru::Vector.new([11,12,13,14,15], index: [:one, :two, :three, :four, :five])
138
163
 
139
164
  df = Daru::DataFrame.new({ b: vb, a: va }, order: [:a, :b], index: [:one, :two, :three, :four, :five])
@@ -142,8 +167,6 @@ describe Daru::DataFrame do
142
167
  expect(df.vectors).to eq(Daru::Index.new [:a, :b])
143
168
  expect(df.a.class).to eq(Daru::Vector)
144
169
  expect(df.a) .to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
145
- expect(df.a.metadata).to eq({ cdc_type: 2 })
146
- expect(df.b.metadata).to eq({})
147
170
  end
148
171
 
149
172
  it "initializes from an Array of Hashes" do
@@ -422,13 +445,6 @@ describe Daru::DataFrame do
422
445
  expect(@df[:a, :b]).to eq(temp)
423
446
  end
424
447
 
425
- it "returns a DataFrame with metadata" do
426
- @df[:a].metadata = "alpha"
427
- @df[:b].metadata = "beta"
428
- subset_df = @df[:a, :b]
429
- expect([:a, :b].map { |v| subset_df[v].metadata }).to eq(["alpha", "beta"])
430
- end
431
-
432
448
  it "accesses vector with Integer index" do
433
449
  expect(@df[0]).to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
434
450
  end
@@ -440,6 +456,11 @@ describe Daru::DataFrame do
440
456
  c: [11,22,33,44,55]
441
457
  }, index: [:one, :two, :three, :four, :five]))
442
458
  end
459
+
460
+ it 'accepts axis parameter as a last argument' do
461
+ expect(@df[:a, :vector]).to eq @df[:a]
462
+ expect(@df[:one, :row]).to eq [1, 11, 11].dv(:one, [:a, :b, :c])
463
+ end
443
464
  end
444
465
 
445
466
  context Daru::MultiIndex do
@@ -540,22 +561,6 @@ describe Daru::DataFrame do
540
561
  expect(df_empty[:b].name).to equal(:b)
541
562
  end
542
563
 
543
- it "copies metadata when the target is a vector" do
544
- vec = Daru::Vector.new(1.upto(@df.size), index: @df.index, metadata: { cdc_type: 2 })
545
- @df[:woo] = vec.dup
546
- expect(@df[:woo].metadata).to eq vec.metadata
547
- end
548
-
549
- it "doesn't delete metadata when the source is a dataframe with empty vectors" do
550
- empty_df = Daru::DataFrame.new({
551
- a: Daru::Vector.new([], metadata: 'alpha'),
552
- b: Daru::Vector.new([], metadata: 'beta'),
553
- })
554
-
555
- empty_df[:c] = Daru::Vector.new(1.upto(3))
556
- expect(empty_df[:a].metadata).to eq 'alpha'
557
- end
558
-
559
564
  it "appends multiple vectors at a time" do
560
565
  # TODO
561
566
  end
@@ -606,6 +611,70 @@ describe Daru::DataFrame do
606
611
  end
607
612
  end
608
613
 
614
+ context '#method_missing' do
615
+ subject(:data_frame) {
616
+ Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
617
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
618
+ index: [:one, :two, :three, :four, :five])
619
+ }
620
+
621
+ context 'getting the vector' do
622
+ subject{
623
+ data_frame.a
624
+ }
625
+ it { is_expected.to eq [1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]) }
626
+ end
627
+
628
+ context 'setting existing vector' do
629
+ before{
630
+ data_frame.a = [100,200,300,400,500]
631
+ }
632
+ it { is_expected.to eq(Daru::DataFrame.new({
633
+ b: [11,12,13,14,15],
634
+ a: [100,200,300,400,500],
635
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
636
+ index: [:one, :two, :three, :four, :five]))
637
+ }
638
+ end
639
+
640
+ context 'setting new vector' do
641
+ before{
642
+ data_frame.d = [100,200,300,400,500]
643
+ }
644
+ it { is_expected.to eq(Daru::DataFrame.new({
645
+ b: [11,12,13,14,15],
646
+ a: [1,2,3,4,5],
647
+ d: [100,200,300,400,500],
648
+ c: [11,22,33,44,55]}, order: [:a, :b, :c, :d],
649
+ index: [:one, :two, :three, :four, :five]))
650
+ }
651
+ end
652
+
653
+ context 'no vector found' do
654
+ it 'should raise' do
655
+ expect { data_frame.e }.to raise_error(NoMethodError)
656
+ end
657
+ end
658
+ end
659
+
660
+ context '#add_vector' do
661
+ subject(:data_frame) {
662
+ Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
663
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
664
+ index: [:one, :two, :three, :four, :five])
665
+ }
666
+ before {
667
+ data_frame.add_vector :a, [100,200,300,400,500]
668
+ }
669
+
670
+ it { is_expected.to eq(Daru::DataFrame.new({
671
+ b: [11,12,13,14,15],
672
+ a: [100,200,300,400,500],
673
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
674
+ index: [:one, :two, :three, :four, :five]))
675
+ }
676
+ end
677
+
609
678
  context "#row[]=" do
610
679
  context Daru::Index do
611
680
  before :each do
@@ -685,10 +754,578 @@ describe Daru::DataFrame do
685
754
 
686
755
  context Daru::MultiIndex do
687
756
  pending
757
+ # TO DO
758
+ end
759
+
760
+ context Daru::CategoricalIndex do
761
+ let(:idx) { Daru::CategoricalIndex.new [:a, 1, :a, 1, :c] }
762
+ let(:df) do
763
+ Daru::DataFrame.new({
764
+ a: 'a'..'e',
765
+ b: 1..5
766
+ }, index: idx)
767
+ end
768
+
769
+ context "modify exiting row" do
770
+ context "single category" do
771
+ subject { df }
772
+ before { df.row[:a] = ['x', 'y'] }
773
+
774
+ it { is_expected.to be_a Daru::DataFrame }
775
+ its(:index) { is_expected.to eq idx }
776
+ its(:vectors) { is_expected.to eq Daru::Index.new [:a, :b] }
777
+ its(:'a.to_a') { is_expected.to eq ['x', 'b', 'x', 'd', 'e'] }
778
+ its(:'b.to_a') { is_expected.to eq ['y', 2, 'y', 4, 5] }
779
+ end
780
+
781
+ context "multiple categories" do
782
+ subject { df }
783
+ before { df.row[:a, 1] = ['x', 'y'] }
784
+
785
+ it { is_expected.to be_a Daru::DataFrame }
786
+ its(:index) { is_expected.to eq idx }
787
+ its(:vectors) { is_expected.to eq Daru::Index.new [:a, :b] }
788
+ its(:'a.to_a') { is_expected.to eq ['x', 'x', 'x', 'x', 'e'] }
789
+ its(:'b.to_a') { is_expected.to eq ['y', 'y', 'y', 'y', 5] }
790
+ end
791
+
792
+ context "positional index" do
793
+ subject { df }
794
+ before { df.row[0, 2] = ['x', 'y'] }
795
+
796
+ it { is_expected.to be_a Daru::DataFrame }
797
+ its(:index) { is_expected.to eq idx }
798
+ its(:vectors) { is_expected.to eq Daru::Index.new [:a, :b] }
799
+ its(:'a.to_a') { is_expected.to eq ['x', 'b', 'x', 'd', 'e'] }
800
+ its(:'b.to_a') { is_expected.to eq ['y', 2, 'y', 4, 5] }
801
+ end
802
+ end
803
+
804
+ context "add new row" do
805
+ # TODO
806
+ end
807
+ end
808
+ end
809
+
810
+ context "#row.at" do
811
+ context Daru::Index do
812
+ let(:idx) { Daru::Index.new [1, 0, :c] }
813
+ let(:df) do
814
+ Daru::DataFrame.new({
815
+ a: 1..3,
816
+ b: 'a'..'c'
817
+ }, index: idx)
818
+ end
819
+
820
+ context "single position" do
821
+ subject { df.row.at 1 }
822
+
823
+ it { is_expected.to be_a Daru::Vector }
824
+ its(:size) { is_expected.to eq 2 }
825
+ its(:to_a) { is_expected.to eq [2, 'b'] }
826
+ its(:'index.to_a') { is_expected.to eq [:a, :b] }
827
+ end
828
+
829
+ context "multiple positions" do
830
+ subject { df.row.at 0, 2 }
831
+
832
+ it { is_expected.to be_a Daru::DataFrame }
833
+ its(:size) { is_expected.to eq 2 }
834
+ its(:'index.to_a') { is_expected.to eq [1, :c] }
835
+ its(:'a.to_a') { is_expected.to eq [1, 3] }
836
+ its(:'b.to_a') { is_expected.to eq ['a', 'c'] }
837
+ end
838
+
839
+ context "invalid position" do
840
+ it { expect { df.row.at 3 }.to raise_error IndexError }
841
+ end
842
+
843
+ context "invalid positions" do
844
+ it { expect { df.row.at 2, 3 }.to raise_error IndexError }
845
+ end
846
+
847
+ context "range" do
848
+ subject { df.row.at 0..1 }
849
+
850
+ it { is_expected.to be_a Daru::DataFrame }
851
+ its(:size) { is_expected.to eq 2 }
852
+ its(:'index.to_a') { is_expected.to eq [1, 0] }
853
+ its(:'a.to_a') { is_expected.to eq [1, 2] }
854
+ its(:'b.to_a') { is_expected.to eq ['a', 'b'] }
855
+ end
856
+
857
+ context "range with negative end" do
858
+ subject { df.row.at 0..-2 }
859
+
860
+ it { is_expected.to be_a Daru::DataFrame }
861
+ its(:size) { is_expected.to eq 2 }
862
+ its(:'index.to_a') { is_expected.to eq [1, 0] }
863
+ its(:'a.to_a') { is_expected.to eq [1, 2] }
864
+ its(:'b.to_a') { is_expected.to eq ['a', 'b'] }
865
+ end
866
+
867
+ context "range with single element" do
868
+ subject { df.row.at 0..0 }
869
+
870
+ it { is_expected.to be_a Daru::DataFrame }
871
+ its(:size) { is_expected.to eq 1 }
872
+ its(:'index.to_a') { is_expected.to eq [1] }
873
+ its(:'a.to_a') { is_expected.to eq [1] }
874
+ its(:'b.to_a') { is_expected.to eq ['a'] }
875
+ end
876
+ end
877
+
878
+ context Daru::MultiIndex do
879
+ let (:idx) do
880
+ Daru::MultiIndex.from_tuples [
881
+ [:a,:one,:bar],
882
+ [:a,:one,:baz],
883
+ [:b,:two,:bar],
884
+ [:a,:two,:baz],
885
+ ]
886
+ end
887
+ let (:df) do
888
+ Daru::DataFrame.new({
889
+ a: 1..4,
890
+ b: 'a'..'d'
891
+ }, index: idx )
892
+ end
893
+
894
+ context "single position" do
895
+ subject { df.row.at 1 }
896
+
897
+ it { is_expected.to be_a Daru::Vector }
898
+ its(:size) { is_expected.to eq 2 }
899
+ its(:to_a) { is_expected.to eq [2, 'b'] }
900
+ its(:'index.to_a') { is_expected.to eq [:a, :b] }
901
+ end
902
+
903
+ context "multiple positions" do
904
+ subject { df.row.at 0, 2 }
905
+
906
+ it { is_expected.to be_a Daru::DataFrame }
907
+ its(:size) { is_expected.to eq 2 }
908
+ its(:'index.to_a') { is_expected.to eq [[:a, :one, :bar],
909
+ [:b, :two, :bar]] }
910
+ its(:'a.to_a') { is_expected.to eq [1, 3] }
911
+ its(:'a.index.to_a') { is_expected.to eq [[:a, :one, :bar],
912
+ [:b, :two, :bar]] }
913
+ its(:'b.to_a') { is_expected.to eq ['a', 'c'] }
914
+ end
915
+
916
+ context "invalid position" do
917
+ it { expect { df.row.at 4 }.to raise_error IndexError }
918
+ end
919
+
920
+ context "invalid positions" do
921
+ it { expect { df.row.at 3, 4 }.to raise_error IndexError }
922
+ end
923
+
924
+ context "range" do
925
+ subject { df.row.at 0..1 }
926
+
927
+ it { is_expected.to be_a Daru::DataFrame }
928
+ its(:size) { is_expected.to eq 2 }
929
+ its(:'index.to_a') { is_expected.to eq [[:a, :one, :bar],
930
+ [:a, :one, :baz]] }
931
+ its(:'a.to_a') { is_expected.to eq [1, 2] }
932
+ its(:'a.index.to_a') { is_expected.to eq [[:a, :one, :bar],
933
+ [:a, :one, :baz]] }
934
+ its(:'b.to_a') { is_expected.to eq ['a', 'b'] }
935
+ end
936
+
937
+ context "range with negative end" do
938
+ subject { df.row.at 0..-3 }
939
+
940
+ it { is_expected.to be_a Daru::DataFrame }
941
+ its(:size) { is_expected.to eq 2 }
942
+ its(:'index.to_a') { is_expected.to eq [[:a, :one, :bar],
943
+ [:a, :one, :baz]] }
944
+ its(:'a.to_a') { is_expected.to eq [1, 2] }
945
+ its(:'a.index.to_a') { is_expected.to eq [[:a, :one, :bar],
946
+ [:a, :one, :baz]] }
947
+ its(:'b.to_a') { is_expected.to eq ['a', 'b'] }
948
+ end
949
+
950
+ context " range with single element" do
951
+ subject { df.row.at 0..0 }
952
+
953
+ it { is_expected.to be_a Daru::DataFrame }
954
+ its(:size) { is_expected.to eq 1 }
955
+ its(:'index.to_a') { is_expected.to eq [[:a, :one, :bar]] }
956
+ its(:'a.to_a') { is_expected.to eq [1] }
957
+ its(:'a.index.to_a') { is_expected.to eq [[:a, :one, :bar]] }
958
+ its(:'b.to_a') { is_expected.to eq ['a'] }
959
+ end
960
+ end
961
+
962
+ context Daru::CategoricalIndex do
963
+ let (:idx) { Daru::CategoricalIndex.new [:a, 1, 1, :a, :c] }
964
+ let (:df) do
965
+ Daru::DataFrame.new({
966
+ a: 1..5,
967
+ b: 'a'..'e'
968
+ }, index: idx )
969
+ end
970
+
971
+ context "single positional index" do
972
+ subject { df.row.at 1 }
973
+
974
+ it { is_expected.to be_a Daru::Vector }
975
+ its(:size) { is_expected.to eq 2 }
976
+ its(:to_a) { is_expected.to eq [2, 'b'] }
977
+ its(:'index.to_a') { is_expected.to eq [:a, :b] }
978
+ end
979
+
980
+ context "multiple positional indexes" do
981
+ subject { df.row.at 0, 2 }
982
+
983
+ it { is_expected.to be_a Daru::DataFrame }
984
+ its(:size) { is_expected.to eq 2 }
985
+ its(:'index.to_a') { is_expected.to eq [:a, 1] }
986
+ its(:'a.to_a') { is_expected.to eq [1, 3] }
987
+ its(:'a.index.to_a') { is_expected.to eq [:a, 1] }
988
+ its(:'b.to_a') { is_expected.to eq ['a', 'c'] }
989
+ its(:'b.index.to_a') { is_expected.to eq [:a, 1] }
990
+ end
991
+
992
+ context "invalid position" do
993
+ it { expect { df.at 5 }.to raise_error IndexError }
994
+ end
995
+
996
+ context "invalid positions" do
997
+ it { expect { df.at 4, 5 }.to raise_error IndexError }
998
+ end
999
+
1000
+ context "range" do
1001
+ subject { df.row.at 0..1 }
1002
+
1003
+ it { is_expected.to be_a Daru::DataFrame }
1004
+ its(:size) { is_expected.to eq 2 }
1005
+ its(:'index.to_a') { is_expected.to eq [:a, 1] }
1006
+ its(:'a.to_a') { is_expected.to eq [1, 2] }
1007
+ its(:'a.index.to_a') { is_expected.to eq [:a, 1] }
1008
+ its(:'b.to_a') { is_expected.to eq ['a', 'b'] }
1009
+ its(:'b.index.to_a') { is_expected.to eq [:a, 1] }
1010
+ end
1011
+
1012
+ context "range with negative end" do
1013
+ subject { df.row.at 0..-4 }
1014
+
1015
+ it { is_expected.to be_a Daru::DataFrame }
1016
+ its(:size) { is_expected.to eq 2 }
1017
+ its(:'index.to_a') { is_expected.to eq [:a, 1] }
1018
+ its(:'a.to_a') { is_expected.to eq [1, 2] }
1019
+ its(:'a.index.to_a') { is_expected.to eq [:a, 1] }
1020
+ its(:'b.to_a') { is_expected.to eq ['a', 'b'] }
1021
+ its(:'b.index.to_a') { is_expected.to eq [:a, 1] }
1022
+ end
1023
+
1024
+ context " range with single element" do
1025
+ subject { df.row.at 0..0 }
1026
+
1027
+ it { is_expected.to be_a Daru::DataFrame }
1028
+ its(:size) { is_expected.to eq 1 }
1029
+ its(:'index.to_a') { is_expected.to eq [:a] }
1030
+ its(:'a.to_a') { is_expected.to eq [1] }
1031
+ its(:'a.index.to_a') { is_expected.to eq [:a] }
1032
+ its(:'b.to_a') { is_expected.to eq ['a'] }
1033
+ its(:'b.index.to_a') { is_expected.to eq [:a] }
1034
+ end
1035
+ end
1036
+ end
1037
+
1038
+ context "#row.set_at" do
1039
+ let(:df) do
1040
+ Daru::DataFrame.new({
1041
+ a: 1..3,
1042
+ b: 'a'..'c'
1043
+ })
1044
+ end
1045
+
1046
+ context "single position" do
1047
+ subject { df }
1048
+ before { df.row.set_at [1], ['x', 'y'] }
1049
+
1050
+ its(:size) { is_expected.to eq 3 }
1051
+ its(:'a.to_a') { is_expected.to eq [1, 'x', 3] }
1052
+ its(:'b.to_a') { is_expected.to eq ['a', 'y', 'c'] }
1053
+ end
1054
+
1055
+ context "multiple position" do
1056
+ subject { df }
1057
+ before { df.row.set_at [0, 2], ['x', 'y'] }
1058
+
1059
+ its(:size) { is_expected.to eq 3 }
1060
+ its(:'a.to_a') { is_expected.to eq ['x', 2, 'x'] }
1061
+ its(:'b.to_a') { is_expected.to eq ['y', 'b', 'y'] }
1062
+ end
1063
+
1064
+ context "invalid position" do
1065
+ it { expect { df.row.set_at [3], ['x', 'y'] }.to raise_error IndexError }
1066
+ end
1067
+
1068
+ context "invalid positions" do
1069
+ it { expect { df.row.set_at [2, 3], ['x', 'y'] }.to raise_error IndexError }
1070
+ end
1071
+
1072
+ context "incorrect size" do
1073
+ it { expect { df.row.set_at [1], ['x', 'y', 'z'] }.to raise_error SizeError }
1074
+ end
1075
+ end
1076
+
1077
+ context "#at" do
1078
+ context Daru::Index do
1079
+ let(:idx) { Daru::Index.new [:a, :b, :c] }
1080
+ let(:df) do
1081
+ Daru::DataFrame.new({
1082
+ 1 => 1..3,
1083
+ a: 'a'..'c',
1084
+ b: 11..13
1085
+ }, index: idx)
1086
+ end
1087
+
1088
+ context "single position" do
1089
+ subject { df.at 1 }
1090
+
1091
+ it { is_expected.to be_a Daru::Vector }
1092
+ its(:size) { is_expected.to eq 3 }
1093
+ its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
1094
+ its(:index) { is_expected.to eq idx }
1095
+ end
1096
+
1097
+ context "multiple positions" do
1098
+ subject { df.at 0, 2 }
1099
+
1100
+ it { is_expected.to be_a Daru::DataFrame }
1101
+ its(:shape) { is_expected.to eq [3, 2] }
1102
+ its(:index) { is_expected.to eq idx }
1103
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1104
+ its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1105
+ end
1106
+
1107
+ context "single invalid position" do
1108
+ it { expect { df. at 3 }.to raise_error IndexError }
1109
+ end
1110
+
1111
+ context "multiple invalid positions" do
1112
+ it { expect { df.at 2, 3 }.to raise_error IndexError }
1113
+ end
1114
+
1115
+ context "range" do
1116
+ subject { df.at 0..1 }
1117
+
1118
+ it { is_expected.to be_a Daru::DataFrame }
1119
+ its(:shape) { is_expected.to eq [3, 2] }
1120
+ its(:index) { is_expected.to eq idx }
1121
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1122
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1123
+ end
1124
+
1125
+ context "range with negative end" do
1126
+ subject { df.at 0..-2 }
1127
+
1128
+ it { is_expected.to be_a Daru::DataFrame }
1129
+ its(:shape) { is_expected.to eq [3, 2] }
1130
+ its(:index) { is_expected.to eq idx }
1131
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1132
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1133
+ end
1134
+
1135
+ context "range with single element" do
1136
+ subject { df.at 1..1 }
1137
+
1138
+ it { is_expected.to be_a Daru::DataFrame }
1139
+ its(:shape) { is_expected.to eq [3, 1] }
1140
+ its(:index) { is_expected.to eq idx }
1141
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1142
+ end
1143
+ end
1144
+
1145
+ context Daru::MultiIndex do
1146
+ let (:idx) do
1147
+ Daru::MultiIndex.from_tuples [
1148
+ [:a,:one,:bar],
1149
+ [:a,:one,:baz],
1150
+ [:b,:two,:bar],
1151
+ ]
1152
+ end
1153
+ let(:df) do
1154
+ Daru::DataFrame.new({
1155
+ 1 => 1..3,
1156
+ a: 'a'..'c',
1157
+ b: 11..13
1158
+ }, index: idx)
1159
+ end
1160
+
1161
+ context "single position" do
1162
+ subject { df.at 1 }
1163
+
1164
+ it { is_expected.to be_a Daru::Vector }
1165
+ its(:size) { is_expected.to eq 3 }
1166
+ its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
1167
+ its(:index) { is_expected.to eq idx }
1168
+ end
1169
+
1170
+ context "multiple positions" do
1171
+ subject { df.at 0, 2 }
1172
+
1173
+ it { is_expected.to be_a Daru::DataFrame }
1174
+ its(:shape) { is_expected.to eq [3, 2] }
1175
+ its(:index) { is_expected.to eq idx }
1176
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1177
+ its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1178
+ end
1179
+
1180
+ context "single invalid position" do
1181
+ it { expect { df. at 3 }.to raise_error IndexError }
1182
+ end
1183
+
1184
+ context "multiple invalid positions" do
1185
+ it { expect { df.at 2, 3 }.to raise_error IndexError }
1186
+ end
1187
+
1188
+ context "range" do
1189
+ subject { df.at 0..1 }
1190
+
1191
+ it { is_expected.to be_a Daru::DataFrame }
1192
+ its(:shape) { is_expected.to eq [3, 2] }
1193
+ its(:index) { is_expected.to eq idx }
1194
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1195
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1196
+ end
1197
+
1198
+ context "range with negative end" do
1199
+ subject { df.at 0..-2 }
1200
+
1201
+ it { is_expected.to be_a Daru::DataFrame }
1202
+ its(:shape) { is_expected.to eq [3, 2] }
1203
+ its(:index) { is_expected.to eq idx }
1204
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1205
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1206
+ end
1207
+
1208
+ context "range with single element" do
1209
+ subject { df.at 1..1 }
1210
+
1211
+ it { is_expected.to be_a Daru::DataFrame }
1212
+ its(:shape) { is_expected.to eq [3, 1] }
1213
+ its(:index) { is_expected.to eq idx }
1214
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1215
+ end
1216
+ end
1217
+
1218
+ context Daru::CategoricalIndex do
1219
+ let (:idx) { Daru::CategoricalIndex.new [:a, 1, 1] }
1220
+ let(:df) do
1221
+ Daru::DataFrame.new({
1222
+ 1 => 1..3,
1223
+ a: 'a'..'c',
1224
+ b: 11..13
1225
+ }, index: idx)
1226
+ end
1227
+
1228
+ context "single position" do
1229
+ subject { df.at 1 }
1230
+
1231
+ it { is_expected.to be_a Daru::Vector }
1232
+ its(:size) { is_expected.to eq 3 }
1233
+ its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
1234
+ its(:index) { is_expected.to eq idx }
1235
+ end
1236
+
1237
+ context "multiple positions" do
1238
+ subject { df.at 0, 2 }
1239
+
1240
+ it { is_expected.to be_a Daru::DataFrame }
1241
+ its(:shape) { is_expected.to eq [3, 2] }
1242
+ its(:index) { is_expected.to eq idx }
1243
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1244
+ its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1245
+ end
1246
+
1247
+ context "single invalid position" do
1248
+ it { expect { df. at 3 }.to raise_error IndexError }
1249
+ end
1250
+
1251
+ context "multiple invalid positions" do
1252
+ it { expect { df.at 2, 3 }.to raise_error IndexError }
1253
+ end
1254
+
1255
+ context "range" do
1256
+ subject { df.at 0..1 }
1257
+
1258
+ it { is_expected.to be_a Daru::DataFrame }
1259
+ its(:shape) { is_expected.to eq [3, 2] }
1260
+ its(:index) { is_expected.to eq idx }
1261
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1262
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1263
+ end
1264
+
1265
+ context "range with negative index" do
1266
+ subject { df.at 0..-2 }
1267
+
1268
+ it { is_expected.to be_a Daru::DataFrame }
1269
+ its(:shape) { is_expected.to eq [3, 2] }
1270
+ its(:index) { is_expected.to eq idx }
1271
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1272
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1273
+ end
1274
+
1275
+ context "range with single element" do
1276
+ subject { df.at 1..1 }
1277
+
1278
+ it { is_expected.to be_a Daru::DataFrame }
1279
+ its(:shape) { is_expected.to eq [3, 1] }
1280
+ its(:index) { is_expected.to eq idx }
1281
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1282
+ end
1283
+ end
1284
+ end
1285
+
1286
+ context "#set_at" do
1287
+ let(:df) do
1288
+ Daru::DataFrame.new({
1289
+ 1 => 1..3,
1290
+ a: 'a'..'c',
1291
+ b: 11..13
1292
+ })
1293
+ end
1294
+
1295
+ context "single position" do
1296
+ subject { df }
1297
+ before { df.set_at [1], ['x', 'y', 'z'] }
1298
+
1299
+ its(:shape) { is_expected.to eq [3, 3] }
1300
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1301
+ its(:'a.to_a') { is_expected.to eq ['x', 'y', 'z'] }
1302
+ its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1303
+ end
1304
+
1305
+ context "multiple position" do
1306
+ subject { df }
1307
+ before { df.set_at [1, 2], ['x', 'y', 'z'] }
1308
+
1309
+ its(:shape) { is_expected.to eq [3, 3] }
1310
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1311
+ its(:'a.to_a') { is_expected.to eq ['x', 'y', 'z'] }
1312
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'z'] }
1313
+ end
1314
+
1315
+ context "invalid position" do
1316
+ it { expect { df.set_at [3], ['x', 'y', 'z'] }.to raise_error IndexError }
1317
+ end
1318
+
1319
+ context "invalid positions" do
1320
+ it { expect { df.set_at [2, 3], ['x', 'y', 'z'] }.to raise_error IndexError }
1321
+ end
1322
+
1323
+ context "incorrect size" do
1324
+ it { expect { df.set_at [1], ['x', 'y'] }.to raise_error SizeError }
688
1325
  end
689
1326
  end
690
1327
 
691
- context "#row" do
1328
+ context "#row[]" do
692
1329
  context Daru::Index do
693
1330
  before :each do
694
1331
  @df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
@@ -741,7 +1378,7 @@ describe Daru::DataFrame do
741
1378
  expect(@df_mi.row[0]).to eq(Daru::Vector.new([11,1,11,1], index: @order_mi))
742
1379
  end
743
1380
 
744
- it "returns a DataFrame when specifying numeric range" do
1381
+ it "returns a DataFrame whecn specifying numeric range" do
745
1382
  sub_index = Daru::MultiIndex.from_tuples([
746
1383
  [:a,:one,:bar],
747
1384
  [:a,:one,:baz]
@@ -787,9 +1424,97 @@ describe Daru::DataFrame do
787
1424
  ], index: sub_index, order: @order_mi))
788
1425
  end
789
1426
  end
1427
+
1428
+ context Daru::CategoricalIndex do
1429
+ let(:idx) { Daru::CategoricalIndex.new [:a, 1, :a, 1, :c] }
1430
+ let(:df) do
1431
+ Daru::DataFrame.new({
1432
+ a: 'a'..'e',
1433
+ b: 1..5
1434
+ }, index: idx)
1435
+ end
1436
+
1437
+ context "single category" do
1438
+ context "multiple instances" do
1439
+ subject { df.row[:a] }
1440
+
1441
+ it { is_expected.to be_a Daru::DataFrame }
1442
+ its(:index) { is_expected.to eq Daru::CategoricalIndex.new [:a, :a] }
1443
+ its(:vectors) { is_expected.to eq Daru::Index.new [:a, :b] }
1444
+ its(:a) { Daru::Vector.new ['a', 'c'] }
1445
+ its(:b) { Daru::Vector.new [1, 3] }
1446
+ end
1447
+
1448
+ context "single instance" do
1449
+ subject { df.row[:c] }
1450
+
1451
+ it { is_expected.to be_a Daru::Vector }
1452
+ its(:index) { is_expected.to eq Daru::Index.new [:a, :b] }
1453
+ its(:to_a) { is_expected.to eq ['e', 5] }
1454
+ end
1455
+ end
1456
+
1457
+ context "multiple categories" do
1458
+ subject { df.row[:a, 1] }
1459
+
1460
+ it { is_expected.to be_a Daru::DataFrame }
1461
+ its(:index) { is_expected.to eq Daru::CategoricalIndex.new(
1462
+ [:a, 1, :a, 1 ]) }
1463
+ its(:vectors) { is_expected.to eq Daru::Index.new [:a, :b] }
1464
+ its(:a) { Daru::Vector.new ['a', 'c', 'b', 'd'] }
1465
+ its(:b) { Daru::Vector.new [1, 3, 2, 4] }
1466
+ end
1467
+
1468
+ context "positional index" do
1469
+ subject { df.row[0] }
1470
+
1471
+ it { is_expected.to be_a Daru::Vector }
1472
+ its(:index) { is_expected.to eq Daru::Index.new [:a, :b] }
1473
+ its(:to_a) { is_expected.to eq ['a', 1] }
1474
+ end
1475
+
1476
+ context "invalid positional index" do
1477
+ it { expect { df.row[5] }.to raise_error IndexError }
1478
+ end
1479
+
1480
+ context "invalid category" do
1481
+ it { expect { df.row[:d] }.to raise_error IndexError }
1482
+ end
1483
+ end
790
1484
  end
791
1485
 
792
1486
  context "#add_row" do
1487
+ subject(:data_frame) {
1488
+ Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
1489
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
1490
+ index: [:one, :two, :three, :four, :five])
1491
+ }
1492
+ context 'named' do
1493
+ before {
1494
+ data_frame.add_row [100,200,300], :six
1495
+ }
1496
+
1497
+ it { is_expected.to eq(Daru::DataFrame.new({
1498
+ a: [1,2,3,4,5,100],
1499
+ b: [11,12,13,14,15,200],
1500
+ c: [11,22,33,44,55,300]}, order: [:a, :b, :c],
1501
+ index: [:one, :two, :three, :four, :five, :six]))
1502
+ }
1503
+ end
1504
+
1505
+ context 'unnamed' do
1506
+ before {
1507
+ data_frame.add_row [100,200,300]
1508
+ }
1509
+
1510
+ it { is_expected.to eq(Daru::DataFrame.new({
1511
+ a: [1,2,3,4,5,100],
1512
+ b: [11,12,13,14,15,200],
1513
+ c: [11,22,33,44,55,300]}, order: [:a, :b, :c],
1514
+ index: [:one, :two, :three, :four, :five, 5]))
1515
+ }
1516
+ end
1517
+
793
1518
  it "allows adding rows after making empty DF by specfying only order" do
794
1519
  df = Daru::DataFrame.new({}, order: [:a, :b, :c])
795
1520
  df.add_row [1,2,3]
@@ -802,6 +1527,40 @@ describe Daru::DataFrame do
802
1527
  end
803
1528
  end
804
1529
 
1530
+ context "#first" do
1531
+ it 'works' do
1532
+ expect(@data_frame.first(2)).to eq(
1533
+ Daru::DataFrame.new({b: [11,12], a: [1,2], c: [11,22]},
1534
+ order: [:a, :b, :c],
1535
+ index: [:one, :two]))
1536
+ end
1537
+
1538
+ it 'works with too large values' do
1539
+ expect(@data_frame.first(200)).to eq(@data_frame)
1540
+ end
1541
+
1542
+ it 'has synonym' do
1543
+ expect(@data_frame.first(2)).to eq(@data_frame.head(2))
1544
+ end
1545
+ end
1546
+
1547
+ context "#last" do
1548
+ it 'works' do
1549
+ expect(@data_frame.last(2)).to eq(
1550
+ Daru::DataFrame.new({b: [14,15], a: [4,5], c: [44,55]},
1551
+ order: [:a, :b, :c],
1552
+ index: [:four, :five]))
1553
+ end
1554
+
1555
+ it 'works with too large values' do
1556
+ expect(@data_frame.last(200)).to eq(@data_frame)
1557
+ end
1558
+
1559
+ it 'has synonym' do
1560
+ expect(@data_frame.last(2)).to eq(@data_frame.tail(2))
1561
+ end
1562
+ end
1563
+
805
1564
  context "#==" do
806
1565
  it "compares by vectors, index and values of a DataFrame (ignores name)" do
807
1566
  a = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
@@ -814,6 +1573,13 @@ describe Daru::DataFrame do
814
1573
  end
815
1574
  end
816
1575
 
1576
+ context '#rename' do
1577
+ subject { @data_frame.rename 'other' }
1578
+
1579
+ it { is_expected.to be_a Daru::DataFrame }
1580
+ its(:name) { is_expected.to eq 'other' }
1581
+ end
1582
+
817
1583
  context "#dup" do
818
1584
  context Daru::Index do
819
1585
  it "dups every data structure inside DataFrame" do
@@ -841,31 +1607,122 @@ describe Daru::DataFrame do
841
1607
  end
842
1608
  end
843
1609
 
844
- context "#dup_only_valid" do
845
- before do
846
- @missing_data_df = Daru::DataFrame.new({
847
- a: [1 , 2, 3, nil, 4, nil, 5],
848
- b: [nil, 2, 3, nil, 4, nil, 5],
849
- c: [1, 2, 3, 43 , 4, nil, 5]
1610
+ context '#reject_values' do
1611
+ let(:df) do
1612
+ Daru::DataFrame.new({
1613
+ a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
1614
+ b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
1615
+ c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
1616
+ }, index: 11..18)
1617
+ end
1618
+ before { df.to_category :b }
1619
+
1620
+ context 'remove nils only' do
1621
+ subject { df.reject_values nil }
1622
+ it { is_expected.to be_a Daru::DataFrame }
1623
+ its(:'b.type') { is_expected.to eq :category }
1624
+ its(:'a.to_a') { is_expected.to eq [1, 2, 7] }
1625
+ its(:'b.to_a') { is_expected.to eq [:a, :b, 8] }
1626
+ its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 7] }
1627
+ its(:'index.to_a') { is_expected.to eq [11, 12, 18] }
1628
+ end
1629
+
1630
+ context 'remove Float::NAN only' do
1631
+ subject { df.reject_values Float::NAN }
1632
+ it { is_expected.to be_a Daru::DataFrame }
1633
+ its(:'b.type') { is_expected.to eq :category }
1634
+ its(:'a.to_a') { is_expected.to eq [1, 3, nil, 1, 7] }
1635
+ its(:'b.to_a') { is_expected.to eq [:a, nil, 3, 5, 8] }
1636
+ its(:'c.to_a') { is_expected.to eq ['a', 3, 5, nil, 7] }
1637
+ its(:'index.to_a') { is_expected.to eq [11, 13, 16, 17, 18] }
1638
+ end
1639
+
1640
+ context 'remove both nil and Float::NAN' do
1641
+ subject { df.reject_values nil, Float::NAN }
1642
+ it { is_expected.to be_a Daru::DataFrame }
1643
+ its(:'b.type') { is_expected.to eq :category }
1644
+ its(:'a.to_a') { is_expected.to eq [1, 7] }
1645
+ its(:'b.to_a') { is_expected.to eq [:a, 8] }
1646
+ its(:'c.to_a') { is_expected.to eq ['a', 7] }
1647
+ its(:'index.to_a') { is_expected.to eq [11, 18] }
1648
+ end
1649
+
1650
+ context 'any other values' do
1651
+ subject { df.reject_values 1, 5 }
1652
+ it { is_expected.to be_a Daru::DataFrame }
1653
+ its(:'b.type') { is_expected.to eq :category }
1654
+ its(:'a.to_a') { is_expected.to eq [2, 3, nil, Float::NAN, 7] }
1655
+ its(:'b.to_a') { is_expected.to eq [:b, nil, Float::NAN, nil, 8] }
1656
+ its(:'c.to_a') { is_expected.to eq [Float::NAN, 3, 4, 3, 7] }
1657
+ its(:'index.to_a') { is_expected.to eq [12, 13, 14, 15, 18] }
1658
+ end
1659
+
1660
+ context 'when resultant dataframe has one row' do
1661
+ subject { df.reject_values 1, 2, 3, 4, 5, nil, Float::NAN }
1662
+ it { is_expected.to be_a Daru::DataFrame }
1663
+ its(:'b.type') { is_expected.to eq :category }
1664
+ its(:'a.to_a') { is_expected.to eq [7] }
1665
+ its(:'b.to_a') { is_expected.to eq [8] }
1666
+ its(:'c.to_a') { is_expected.to eq [7] }
1667
+ its(:'index.to_a') { is_expected.to eq [18] }
1668
+ end
1669
+
1670
+ context 'when resultant dataframe is empty' do
1671
+ subject { df.reject_values 1, 2, 3, 4, 5, 6, 7, nil, Float::NAN }
1672
+ it { is_expected.to be_a Daru::DataFrame }
1673
+ its(:'b.type') { is_expected.to eq :category }
1674
+ its(:'a.to_a') { is_expected.to eq [] }
1675
+ its(:'b.to_a') { is_expected.to eq [] }
1676
+ its(:'c.to_a') { is_expected.to eq [] }
1677
+ its(:'index.to_a') { is_expected.to eq [] }
1678
+ end
1679
+ end
1680
+
1681
+ context '#replace_values' do
1682
+ subject do
1683
+ Daru::DataFrame.new({
1684
+ a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
1685
+ b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
1686
+ c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
850
1687
  })
851
1688
  end
852
-
853
- it "dups rows with non-missing data only" do
854
- df = Daru::DataFrame.new({
855
- a: [2, 3, 4, 5],
856
- b: [2, 3, 4, 5],
857
- c: [2, 3, 4, 5]
858
- }, index: [1,2,4,6])
859
- expect(@missing_data_df.dup_only_valid).to eq(df)
860
- end
861
-
862
- it "dups only the specified vectors" do
863
- df = Daru::DataFrame.new({
864
- a: [2,3,4,5],
865
- c: [2,3,4,5]
866
- }, index: [1,2,4,6])
867
- expect(@missing_data_df.dup_only_valid([:a, :c])).to eq(df)
868
- end
1689
+ before { subject.to_category :b }
1690
+
1691
+ context 'replace nils only' do
1692
+ before { subject.replace_values nil, 10 }
1693
+ it { is_expected.to be_a Daru::DataFrame }
1694
+ its(:'b.type') { is_expected.to eq :category }
1695
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, 10, Float::NAN, 10, 1, 7] }
1696
+ its(:'b.to_a') { is_expected.to eq [:a, :b, 10, Float::NAN, 10, 3, 5, 8] }
1697
+ its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 3, 4, 3, 5, 10, 7] }
1698
+ end
1699
+
1700
+ context 'replace Float::NAN only' do
1701
+ before { subject.replace_values Float::NAN, 10 }
1702
+ it { is_expected.to be_a Daru::DataFrame }
1703
+ its(:'b.type') { is_expected.to eq :category }
1704
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, nil, 10, nil, 1, 7] }
1705
+ its(:'b.to_a') { is_expected.to eq [:a, :b, nil, 10, nil, 3, 5, 8] }
1706
+ its(:'c.to_a') { is_expected.to eq ['a', 10, 3, 4, 3, 5, nil, 7] }
1707
+ end
1708
+
1709
+ context 'replace both nil and Float::NAN' do
1710
+ before { subject.replace_values [nil, Float::NAN], 10 }
1711
+ it { is_expected.to be_a Daru::DataFrame }
1712
+ its(:'b.type') { is_expected.to eq :category }
1713
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, 10, 10, 10, 1, 7] }
1714
+ its(:'b.to_a') { is_expected.to eq [:a, :b, 10, 10, 10, 3, 5, 8] }
1715
+ its(:'c.to_a') { is_expected.to eq ['a', 10, 3, 4, 3, 5, 10, 7] }
1716
+ end
1717
+
1718
+ context 'replace other values' do
1719
+ before { subject.replace_values [1, 5], 10 }
1720
+ it { is_expected.to be_a Daru::DataFrame }
1721
+ its(:'b.type') { is_expected.to eq :category }
1722
+ its(:'a.to_a') { is_expected.to eq [10, 2, 3, nil, Float::NAN, nil, 10, 7] }
1723
+ its(:'b.to_a') { is_expected.to eq [:a, :b, nil, Float::NAN, nil, 3, 10, 8] }
1724
+ its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 3, 4, 3, 10, nil, 7] }
1725
+ end
869
1726
  end
870
1727
 
871
1728
  context "#clone" do
@@ -902,6 +1759,27 @@ describe Daru::DataFrame do
902
1759
 
903
1760
  end
904
1761
 
1762
+ context "#clone_only_valid" do
1763
+ let(:df_with_missing) {
1764
+ Daru::DataFrame.new({
1765
+ a: [1 , 2, 3, nil, 4, nil, 5],
1766
+ b: [nil, 2, 3, nil, 4, nil, 5],
1767
+ c: [1, 2, 3, 43 , 4, nil, 5]
1768
+ })
1769
+ }
1770
+
1771
+ let(:df_without_missing) {
1772
+ Daru::DataFrame.new({
1773
+ a: [2,3,4,5],
1774
+ c: [2,3,4,5]
1775
+ })
1776
+ }
1777
+ it 'does the most reasonable thing' do
1778
+ expect(df_with_missing.clone_only_valid).to eq(df_with_missing.reject_values(*Daru::MISSING_VALUES))
1779
+ expect(df_without_missing.clone_only_valid).to eq(df_without_missing.clone)
1780
+ end
1781
+ end
1782
+
905
1783
  context "#clone_structure" do
906
1784
  it "clones only the index and vector structures of the data frame" do
907
1785
  cs = @data_frame.clone_structure
@@ -912,6 +1790,19 @@ describe Daru::DataFrame do
912
1790
  end
913
1791
  end
914
1792
 
1793
+ context "#each_index" do
1794
+ it "iterates over index" do
1795
+ idxs = []
1796
+ ret = @data_frame.each_index do |index|
1797
+ idxs << index
1798
+ end
1799
+
1800
+ expect(idxs).to eq([:one, :two, :three, :four, :five])
1801
+
1802
+ expect(ret).to eq(@data_frame)
1803
+ end
1804
+ end
1805
+
915
1806
  context "#each_vector_with_index" do
916
1807
  it "iterates over vectors with index" do
917
1808
  idxs = []
@@ -964,6 +1855,10 @@ describe Daru::DataFrame do
964
1855
  ret = @data_frame.each
965
1856
  expect(ret.is_a?(Enumerator)).to eq(true)
966
1857
  end
1858
+
1859
+ it "raises on unknown axis" do
1860
+ expect { @data_frame.each(:kitten) }.to raise_error(ArgumentError, /axis/)
1861
+ end
967
1862
  end
968
1863
 
969
1864
  context "#recode" do
@@ -1108,8 +2003,23 @@ describe Daru::DataFrame do
1108
2003
  end
1109
2004
  end
1110
2005
 
2006
+ # FIXME: collect_VECTORS_with_index, but map_VECTOR_with_index -- ??? -- zverok
2007
+ # (Not saying about unfortunate difference between them...)
2008
+ context "#collect_vector_with_index" do
2009
+ it "iterates over vectors with index and returns an Array" do
2010
+ idx = []
2011
+ ret = @data_frame.collect_vector_with_index do |vector, index|
2012
+ idx << index
2013
+ vector.sum
2014
+ end
2015
+
2016
+ expect(ret).to eq(Daru::Vector.new([15, 65, 165], index: [:a, :b, :c]))
2017
+ expect(idx).to eq([:a, :b, :c])
2018
+ end
2019
+ end
2020
+
1111
2021
  context "#map_rows_with_index" do
1112
- it "iterates over rows with index and returns a modified DataFrame" do
2022
+ it "iterates over rows with index and returns an Array" do
1113
2023
  idx = []
1114
2024
  ret = @data_frame.map_rows_with_index do |row, index|
1115
2025
  idx << index
@@ -1122,6 +2032,21 @@ describe Daru::DataFrame do
1122
2032
  end
1123
2033
  end
1124
2034
 
2035
+ context '#collect_row_with_index' do
2036
+ it "iterates over rows with index and returns a Vector" do
2037
+ idx = []
2038
+ ret = @data_frame.collect_row_with_index do |row, index|
2039
+ idx << index
2040
+ expect(row.class).to eq(Daru::Vector)
2041
+ row[:a] * row[:c]
2042
+ end
2043
+
2044
+ expected = Daru::Vector.new([11, 44, 99, 176, 275], index: @data_frame.index)
2045
+ expect(ret).to eq(expected)
2046
+ expect(idx).to eq([:one, :two, :three, :four, :five])
2047
+ end
2048
+ end
2049
+
1125
2050
  context "#delete_vector" do
1126
2051
  context Daru::Index do
1127
2052
  it "deletes the specified vector" do
@@ -1208,6 +2133,13 @@ describe Daru::DataFrame do
1208
2133
 
1209
2134
  expect(a).to eq(Daru::DataFrame.new({a: [2], b: [3]}, order: [:a, :b], index: [1]))
1210
2135
  end
2136
+
2137
+ it "preserves names of vectors" do
2138
+ df = Daru::DataFrame.new a: 1..3, b: 4..6
2139
+ df1 = df.filter_rows { |r| r[:a] != 2 }
2140
+
2141
+ expect(df1[:a].name).to eq(df[:a].name)
2142
+ end
1211
2143
  end
1212
2144
  end
1213
2145
 
@@ -1225,6 +2157,19 @@ describe Daru::DataFrame do
1225
2157
  end
1226
2158
  end
1227
2159
 
2160
+ context "#filter" do
2161
+ let(:df) { Daru::DataFrame.new({a: [1,2,3], b: [2,3,4]}) }
2162
+ it "dispatches" do
2163
+ expect(df.filter(:row){|r| r[:a] % 2 == 0 }).to \
2164
+ eq df.filter_rows{|r| r[:a] % 2 == 0 }
2165
+
2166
+ expect(df.filter(:vector){|v| v[0] == 1}).to \
2167
+ eq df.filter_vectors{|v| v[0] == 1}
2168
+
2169
+ expect { df.filter(:kitten){} }.to raise_error ArgumentError, /axis/
2170
+ end
2171
+ end
2172
+
1228
2173
  context "#to_a" do
1229
2174
  context Daru::Index do
1230
2175
  it "converts DataFrame into array of hashes" do
@@ -1280,7 +2225,6 @@ describe Daru::DataFrame do
1280
2225
  context Daru::Index do
1281
2226
  before :each do
1282
2227
  @df = Daru::DataFrame.new({a: [5,1,-6,7,5,5], b: [-2,-1,5,3,9,1], c: ['a','aa','aaa','aaaa','aaaaa','aaaaaa']})
1283
- @df[:a].metadata = { cdc_type: 2 }
1284
2228
  end
1285
2229
 
1286
2230
  it "sorts according to given vector order (bang)" do
@@ -1302,17 +2246,115 @@ describe Daru::DataFrame do
1302
2246
  )
1303
2247
  expect(ans).to_not eq(@df)
1304
2248
  end
1305
-
1306
- it "retains the vector metadata from the original dataframe" do
1307
- ans = @df.sort([:a])
1308
- expect(ans[:a].metadata).to eq({ cdc_type: 2 })
1309
- end
1310
-
1311
2249
  end
1312
2250
 
1313
2251
  context Daru::MultiIndex do
1314
2252
  pending
1315
2253
  end
2254
+
2255
+ context Daru::CategoricalIndex do
2256
+ let(:idx) { Daru::CategoricalIndex.new [:a, 1, :a, 1, :c] }
2257
+ let(:df) do
2258
+ Daru::DataFrame.new({
2259
+ a: [2, -1, 3, 4, 5],
2260
+ b: ['x', 'y', 'x', 'a', 'y'],
2261
+ c: [nil, nil, -2, 2, 1]
2262
+ }, index: idx)
2263
+ end
2264
+
2265
+ context "ascending order" do
2266
+ context "single vector" do
2267
+ subject { df.sort [:a] }
2268
+
2269
+ its(:'index.to_a') { is_expected.to eq [1, :a, :a, 1, :c] }
2270
+ its(:'a.to_a') { is_expected.to eq [-1, 2, 3, 4, 5] }
2271
+ its(:'b.to_a') { is_expected.to eq ['y', 'x', 'x', 'a', 'y'] }
2272
+ its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 2, 1] }
2273
+ end
2274
+
2275
+ context "multiple vectors" do
2276
+ subject { df.sort [:c, :b] }
2277
+
2278
+ its(:'index.to_a') { is_expected.to eq [:a, 1, :a, :c, 1] }
2279
+ its(:'a.to_a') { is_expected.to eq [2, -1, 3, 5, 4] }
2280
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'x', 'y', 'a'] }
2281
+ its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 1, 2] }
2282
+ end
2283
+
2284
+ context "block" do
2285
+ context "automatic handle nils" do
2286
+ subject do
2287
+ df.sort [:c], by: {c: lambda { |a| a.abs } }, handle_nils: true
2288
+ end
2289
+
2290
+ its(:'index.to_a') { is_expected.to eq [:a, 1, :c, :a, 1] }
2291
+ its(:'a.to_a') { is_expected.to eq [2, -1, 5, 3, 4] }
2292
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'y', 'x', 'a'] }
2293
+ its(:'c.to_a') { is_expected.to eq [nil, nil, 1, -2, 2] }
2294
+ end
2295
+
2296
+ context "manually handle nils" do
2297
+ subject do
2298
+ df.sort [:c], by: {c: lambda { |a| (a.nil?)?[1]:[0,a.abs] } }
2299
+ end
2300
+
2301
+ its(:'index.to_a') { is_expected.to eq [:c, :a, 1, :a, 1] }
2302
+ its(:'a.to_a') { is_expected.to eq [5, 3, 4, 2, -1] }
2303
+ its(:'b.to_a') { is_expected.to eq ['y', 'x', 'a', 'x', 'y'] }
2304
+ its(:'c.to_a') { is_expected.to eq [1, -2, 2, nil, nil] }
2305
+ end
2306
+ end
2307
+ end
2308
+
2309
+ context "descending order" do
2310
+ context "single vector" do
2311
+ subject { df.sort [:a], ascending: false }
2312
+
2313
+ its(:'index.to_a') { is_expected.to eq [:c, 1, :a, :a, 1] }
2314
+ its(:'a.to_a') { is_expected.to eq [5, 4, 3, 2, -1] }
2315
+ its(:'b.to_a') { is_expected.to eq ['y', 'a', 'x', 'x', 'y'] }
2316
+ its(:'c.to_a') { is_expected.to eq [1, 2, -2, nil, nil] }
2317
+ end
2318
+
2319
+ context "multiple vectors" do
2320
+ subject { df.sort [:c, :b], ascending: false }
2321
+
2322
+ its(:'index.to_a') { is_expected.to eq [1, :a, 1, :c, :a] }
2323
+ its(:'a.to_a') { is_expected.to eq [-1, 2, 4, 5, 3] }
2324
+ its(:'b.to_a') { is_expected.to eq ['y', 'x', 'a', 'y', 'x'] }
2325
+ its(:'c.to_a') { is_expected.to eq [nil, nil, 2, 1, -2] }
2326
+ end
2327
+
2328
+ context "block" do
2329
+ context "automatic handle nils" do
2330
+ subject do
2331
+ df.sort [:c],
2332
+ by: {c: lambda { |a| a.abs } },
2333
+ handle_nils: true,
2334
+ ascending: false
2335
+ end
2336
+
2337
+ its(:'index.to_a') { is_expected.to eq [:a, 1, :a, 1, :c] }
2338
+ its(:'a.to_a') { is_expected.to eq [2, -1, 3, 4, 5] }
2339
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'x', 'a', 'y'] }
2340
+ its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 2, 1] }
2341
+ end
2342
+
2343
+ context "manually handle nils" do
2344
+ subject do
2345
+ df.sort [:c],
2346
+ by: {c: lambda { |a| (a.nil?)?[1]:[0,a.abs] } },
2347
+ ascending: false
2348
+ end
2349
+
2350
+ its(:'index.to_a') { is_expected.to eq [:a, 1, :a, 1, :c] }
2351
+ its(:'a.to_a') { is_expected.to eq [2, -1, 3, 4, 5] }
2352
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'x', 'a', 'y'] }
2353
+ its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 2, 1] }
2354
+ end
2355
+ end
2356
+ end
2357
+ end
1316
2358
  end
1317
2359
 
1318
2360
  context "#sort!" do
@@ -1451,6 +2493,11 @@ describe Daru::DataFrame do
1451
2493
  @df.index = Daru::Index.new([1,2])
1452
2494
  }.to raise_error(ArgumentError)
1453
2495
  end
2496
+
2497
+ it "is able to accept array" do
2498
+ @df.index = (1..5).to_a
2499
+ expect(@df.index).to eq Daru::Index.new (1..5).to_a
2500
+ end
1454
2501
  end
1455
2502
 
1456
2503
  context "#vectors=" do
@@ -1755,6 +2802,25 @@ describe Daru::DataFrame do
1755
2802
  ], order: agg_vectors, index: agg_index
1756
2803
  )
1757
2804
  )
2805
+
2806
+ agg_vectors = Daru::MultiIndex.from_tuples(
2807
+ [
2808
+ [:d, 'one'],
2809
+ [:d, 'two'],
2810
+ [:e, 'one'],
2811
+ [:e, 'two']
2812
+ ]
2813
+ )
2814
+ expect(@df.pivot_table(index: [:a], vectors: [:b], values: [:d, :e])).to eq(
2815
+ Daru::DataFrame.new(
2816
+ [
2817
+ [4.5, 5.0/3],
2818
+ [6.5, 3.0],
2819
+ [9.0, 10.0/3],
2820
+ [13.0, 6.0]
2821
+ ], order: agg_vectors, index: agg_index
2822
+ )
2823
+ )
1758
2824
  end
1759
2825
 
1760
2826
  it "overrides default aggregate function to aggregate over sum" do
@@ -1875,6 +2941,12 @@ describe Daru::DataFrame do
1875
2941
  end
1876
2942
  end
1877
2943
 
2944
+ context '#to_df' do
2945
+ it 'returns the dataframe' do
2946
+ @data_frame.to_df == @data_frame
2947
+ end
2948
+ end
2949
+
1878
2950
  context "#to_gsl" do
1879
2951
  it "converts to GSL::Matrix" do
1880
2952
  rows = [[1,2,3,4,5],[11,12,13,14,15],[11,22,33,44,55]].transpose
@@ -1904,6 +2976,16 @@ describe Daru::DataFrame do
1904
2976
 
1905
2977
  expect(ds1.merge(ds3)).to eq(exp)
1906
2978
  end
2979
+
2980
+ context "preserves type of vector names" do
2981
+ let(:df1) { Daru::DataFrame.new({'a'=> [1, 2, 3]}) }
2982
+ let(:df2) { Daru::DataFrame.new({:b=> [4, 5, 6]}) }
2983
+ subject { df1.merge df2 }
2984
+
2985
+ it { is_expected.to be_a Daru::DataFrame }
2986
+ it { expect(subject['a'].to_a).to eq [1, 2, 3] }
2987
+ it { expect(subject[:b].to_a).to eq [4, 5, 6] }
2988
+ end
1907
2989
  end
1908
2990
 
1909
2991
  context "#vector_by_calculation" do
@@ -1967,24 +3049,29 @@ describe Daru::DataFrame do
1967
3049
  end
1968
3050
  end
1969
3051
 
1970
-
1971
- context "has_missing_data?" do
1972
- before do
1973
- a1 = Daru::Vector.new [1, nil, 3, 4, 5, nil]
1974
- a2 = Daru::Vector.new [10, nil, 20, 20, 20, 30]
1975
- b1 = Daru::Vector.new [nil, nil, 1, 1, 1, 2]
1976
- b2 = Daru::Vector.new [2, 2, 2, nil, 2, 3]
1977
- c = Daru::Vector.new [nil, 2, 4, 2, 2, 2]
1978
- @df = Daru::DataFrame.new({ :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2, :c => c })
3052
+ context '#include_values?' do
3053
+ let(:df) do
3054
+ Daru::DataFrame.new({
3055
+ a: [1, 2, 3, 4, Float::NAN, 6, 1],
3056
+ b: [:a, :b, nil, Float::NAN, nil, 3, 5],
3057
+ c: ['a', 6, 3, 4, 3, 5, 3],
3058
+ d: [1, 2, 3, 5, 1, 2, 5]
3059
+ })
1979
3060
  end
1980
-
1981
- it "returns true when missing data present" do
1982
- expect(@df.has_missing_data?).to eq(true)
3061
+ before { df.to_category :b }
3062
+
3063
+ context 'true' do
3064
+ it { expect(df.include_values? nil).to eq true }
3065
+ it { expect(df.include_values? Float::NAN).to eq true }
3066
+ it { expect(df.include_values? nil, Float::NAN).to eq true }
3067
+ it { expect(df.include_values? 1, 30).to eq true }
1983
3068
  end
1984
-
1985
- it "returns false when no missing data prensent" do
1986
- a = @df.dup_only_valid
1987
- expect(a.has_missing_data?).to eq(false)
3069
+
3070
+ context 'false' do
3071
+ it { expect(df[:a, :c].include_values? nil).to eq false }
3072
+ it { expect(df[:c, :d].include_values? Float::NAN).to eq false }
3073
+ it { expect(df[:c, :d].include_values? nil, Float::NAN).to eq false }
3074
+ it { expect(df.include_values? 10, 20).to eq false }
1988
3075
  end
1989
3076
  end
1990
3077
 
@@ -2076,9 +3163,13 @@ describe Daru::DataFrame do
2076
3163
 
2077
3164
  dataf = @df.verify(t3, t1, t2)
2078
3165
  expect(dataf).to eq(exp1)
3166
+ end
2079
3167
 
2080
- dataf = @df.verify(:id, t1, t2, t3)
2081
- expect(dataf).to eq(exp2)
3168
+ it "uses additional fields to extend error messages" do
3169
+ t = create_test("v4='b'", :v2, :v3) { |r| r[:v4] == 'b' }
3170
+
3171
+ dataf = @df.verify(:id, t)
3172
+ expect(dataf).to eq(["1 [r1]: v4='b' (v2=4, v3=10)", "3 [r3]: v4='b' (v2=2, v3=30)"])
2082
3173
  end
2083
3174
  end
2084
3175
 
@@ -2169,6 +3260,10 @@ describe Daru::DataFrame do
2169
3260
  it "returns false if none of the rows satisfy the condition" do
2170
3261
  expect(@df.any?(:row) { |r| r.mean > 100 }).to eq(false)
2171
3262
  end
3263
+
3264
+ it 'fails on unknown axis' do
3265
+ expect { @df.any?(:kitten) { |r| r.mean > 100 } }.to raise_error ArgumentError, /axis/
3266
+ end
2172
3267
  end
2173
3268
 
2174
3269
  context "#all?" do
@@ -2194,6 +3289,10 @@ describe Daru::DataFrame do
2194
3289
  it "returns false if any one of the rows does not satisfy condition" do
2195
3290
  expect(@df.all?(:row) { |r| r.mean == 30 }).to eq(false)
2196
3291
  end
3292
+
3293
+ it 'fails on unknown axis' do
3294
+ expect { @df.all?(:kitten) { |r| r.mean > 100 } }.to raise_error ArgumentError, /axis/
3295
+ end
2197
3296
  end
2198
3297
 
2199
3298
  context "#only_numerics" do
@@ -2372,4 +3471,199 @@ describe Daru::DataFrame do
2372
3471
  end
2373
3472
 
2374
3473
  end
3474
+
3475
+ context '#inspect' do
3476
+ subject { df.inspect }
3477
+
3478
+ context 'empty' do
3479
+ let(:df) { Daru::DataFrame.new({}, order: %w[a b c])}
3480
+ it { is_expected.to eq %Q{
3481
+ |#<Daru::DataFrame(0x3)>
3482
+ | a b c
3483
+ }.unindent}
3484
+ end
3485
+
3486
+ context 'simple' do
3487
+ let(:df) { Daru::DataFrame.new({a: [1,2,3], b: [3,4,5], c: [6,7,8]}, name: 'test')}
3488
+ it { should == %Q{
3489
+ |#<Daru::DataFrame: test (3x3)>
3490
+ | a b c
3491
+ | 0 1 3 6
3492
+ | 1 2 4 7
3493
+ | 2 3 5 8
3494
+ }.unindent}
3495
+ end
3496
+
3497
+ context 'no name' do
3498
+ let(:df) { Daru::DataFrame.new({a: [1,2,3], b: [3,4,5], c: [6,7,8]})}
3499
+ it { should == %Q{
3500
+ |#<Daru::DataFrame(3x3)>
3501
+ | a b c
3502
+ | 0 1 3 6
3503
+ | 1 2 4 7
3504
+ | 2 3 5 8
3505
+ }.unindent}
3506
+ end
3507
+
3508
+ context 'with nils' do
3509
+ let(:df) { Daru::DataFrame.new({a: [1,nil,3], b: [3,4,5], c: [6,7,nil]}, name: 'test')}
3510
+ it { is_expected.to eq %Q{
3511
+ |#<Daru::DataFrame: test (3x3)>
3512
+ | a b c
3513
+ | 0 1 3 6
3514
+ | 1 nil 4 7
3515
+ | 2 3 5 nil
3516
+ }.unindent}
3517
+ end
3518
+
3519
+ context 'very long' do
3520
+ let(:df) { Daru::DataFrame.new({a: [1,1,1]*20, b: [1,1,1]*20, c: [1,1,1]*20}, name: 'test')}
3521
+ it { is_expected.to eq %Q{
3522
+ |#<Daru::DataFrame: test (60x3)>
3523
+ | a b c
3524
+ | 0 1 1 1
3525
+ | 1 1 1 1
3526
+ | 2 1 1 1
3527
+ | 3 1 1 1
3528
+ | 4 1 1 1
3529
+ | 5 1 1 1
3530
+ | 6 1 1 1
3531
+ | 7 1 1 1
3532
+ | 8 1 1 1
3533
+ | 9 1 1 1
3534
+ | 10 1 1 1
3535
+ | 11 1 1 1
3536
+ | 12 1 1 1
3537
+ | 13 1 1 1
3538
+ | 14 1 1 1
3539
+ | ... ... ... ...
3540
+ }.unindent}
3541
+ end
3542
+
3543
+ context 'long data lines' do
3544
+ let(:df) { Daru::DataFrame.new({a: [1,2,3], b: [4,5,6], c: ['this is ridiculously long',nil,nil]}, name: 'test')}
3545
+ it { is_expected.to eq %Q{
3546
+ |#<Daru::DataFrame: test (3x3)>
3547
+ | a b c
3548
+ | 0 1 4 this is ri
3549
+ | 1 2 5 nil
3550
+ | 2 3 6 nil
3551
+ }.unindent}
3552
+ end
3553
+
3554
+ context 'index is a MultiIndex' do
3555
+ let(:df) {
3556
+ Daru::DataFrame.new(
3557
+ {
3558
+ a: [1,2,3,4,5,6,7],
3559
+ b: %w[a b c d e f g]
3560
+ }, index: Daru::MultiIndex.from_tuples([
3561
+ %w[foo one],
3562
+ %w[foo two],
3563
+ %w[foo three],
3564
+ %w[bar one],
3565
+ %w[bar two],
3566
+ %w[bar three],
3567
+ %w[baz one],
3568
+ ]),
3569
+ name: 'test'
3570
+ )
3571
+ }
3572
+
3573
+ it { is_expected.to eq %Q{
3574
+ |#<Daru::DataFrame: test (7x2)>
3575
+ | a b
3576
+ | foo one 1 a
3577
+ | two 2 b
3578
+ | three 3 c
3579
+ | bar one 4 d
3580
+ | two 5 e
3581
+ | three 6 f
3582
+ | baz one 7 g
3583
+ }.unindent}
3584
+ end
3585
+
3586
+ context 'vectors is a MultiIndex' do
3587
+ end
3588
+
3589
+ context 'spacing and threshold settings' do
3590
+ end
3591
+ end
3592
+
3593
+ context '#to_s' do
3594
+ it 'produces something, despite of how reasonable you think it is' do
3595
+ expect(@data_frame.to_s).to eq @data_frame.to_html
3596
+ end
3597
+ end
3598
+
3599
+ context '#to_json' do
3600
+ let(:df) { Daru::DataFrame.new({a: [1,2,3], b: [3,4,5], c: [6,7,8]}, index: [:one, :two, :three], name: 'test')}
3601
+ subject { JSON.parse(json) }
3602
+
3603
+ context 'with index' do
3604
+ let(:json) { df.to_json(false) }
3605
+ # FIXME: is it most reasonable we can do?.. -- zverok
3606
+ # For me, more resonable thing would be something like
3607
+ #
3608
+ # [
3609
+ # {"index" => "one" , "a"=>1, "b"=>3, "c"=>6},
3610
+ # {"index" => "two" , "a"=>2, "b"=>4, "c"=>7},
3611
+ # {"index" => "three", "a"=>3, "b"=>5, "c"=>8}
3612
+ # ]
3613
+ #
3614
+ # Or maybe
3615
+ #
3616
+ # [
3617
+ # ["one" , {"a"=>1, "b"=>3, "c"=>6}],
3618
+ # ["two" , {"a"=>2, "b"=>4, "c"=>7}],
3619
+ # ["three", {"a"=>3, "b"=>5, "c"=>8}]
3620
+ # ]
3621
+ #
3622
+ # Or even
3623
+ #
3624
+ # {
3625
+ # "one" => {"a"=>1, "b"=>3, "c"=>6},
3626
+ # "two" => {"a"=>2, "b"=>4, "c"=>7},
3627
+ # "three" => {"a"=>3, "b"=>5, "c"=>8}
3628
+ # }
3629
+ #
3630
+ it { is_expected.to eq(
3631
+ [
3632
+ [
3633
+ {"a"=>1, "b"=>3, "c"=>6},
3634
+ {"a"=>2, "b"=>4, "c"=>7},
3635
+ {"a"=>3, "b"=>5, "c"=>8}
3636
+ ],
3637
+ ["one", "two", "three"]
3638
+ ]
3639
+ )}
3640
+ end
3641
+
3642
+ context 'without index' do
3643
+ let(:json) { df.to_json(true) }
3644
+ it { is_expected.to eq(
3645
+ [
3646
+ {"a"=>1, "b"=>3, "c"=>6},
3647
+ {"a"=>2, "b"=>4, "c"=>7},
3648
+ {"a"=>3, "b"=>5, "c"=>8}
3649
+ ]
3650
+ )}
3651
+ end
3652
+ end
3653
+
3654
+ context '#create_sql' do
3655
+ let(:df) { Daru::DataFrame.new({
3656
+ a: [1,2,3],
3657
+ b: ['test', 'me', 'please'],
3658
+ c: ['2015-06-01', '2015-06-02', '2015-06-03']
3659
+ },
3660
+ name: 'test'
3661
+ )}
3662
+ subject { df.create_sql('foo') }
3663
+ it { is_expected.to eq %Q{
3664
+ |CREATE TABLE foo (a INTEGER,
3665
+ | b VARCHAR (255),
3666
+ | c DATE) CHARACTER SET=UTF8;
3667
+ }.unindent}
3668
+ end
2375
3669
  end if mri?