daru 0.1.3.1 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rspec +2 -1
  4. data/.rspec_formatter.rb +33 -0
  5. data/.rubocop.yml +26 -2
  6. data/History.md +38 -0
  7. data/README.md +22 -13
  8. data/Rakefile +50 -2
  9. data/benchmarks/csv_reading.rb +22 -0
  10. data/daru.gemspec +9 -2
  11. data/lib/daru.rb +36 -4
  12. data/lib/daru/accessors/array_wrapper.rb +6 -1
  13. data/lib/daru/accessors/dataframe_by_row.rb +10 -2
  14. data/lib/daru/accessors/gsl_wrapper.rb +1 -3
  15. data/lib/daru/accessors/nmatrix_wrapper.rb +9 -0
  16. data/lib/daru/category.rb +935 -0
  17. data/lib/daru/core/group_by.rb +29 -38
  18. data/lib/daru/core/merge.rb +186 -145
  19. data/lib/daru/core/query.rb +22 -11
  20. data/lib/daru/dataframe.rb +976 -885
  21. data/lib/daru/date_time/index.rb +166 -166
  22. data/lib/daru/date_time/offsets.rb +66 -77
  23. data/lib/daru/formatters/table.rb +54 -0
  24. data/lib/daru/helpers/array.rb +40 -0
  25. data/lib/daru/index.rb +476 -73
  26. data/lib/daru/io/io.rb +66 -45
  27. data/lib/daru/io/sql_data_source.rb +33 -62
  28. data/lib/daru/iruby/helpers.rb +38 -0
  29. data/lib/daru/iruby/templates/dataframe.html.erb +52 -0
  30. data/lib/daru/iruby/templates/dataframe_mi.html.erb +58 -0
  31. data/lib/daru/iruby/templates/multi_index.html.erb +12 -0
  32. data/lib/daru/iruby/templates/vector.html.erb +27 -0
  33. data/lib/daru/iruby/templates/vector_mi.html.erb +36 -0
  34. data/lib/daru/maths/arithmetic/dataframe.rb +16 -18
  35. data/lib/daru/maths/arithmetic/vector.rb +4 -6
  36. data/lib/daru/maths/statistics/dataframe.rb +8 -15
  37. data/lib/daru/maths/statistics/vector.rb +120 -98
  38. data/lib/daru/monkeys.rb +12 -40
  39. data/lib/daru/plotting/gruff.rb +3 -0
  40. data/lib/daru/plotting/gruff/category.rb +49 -0
  41. data/lib/daru/plotting/gruff/dataframe.rb +91 -0
  42. data/lib/daru/plotting/gruff/vector.rb +57 -0
  43. data/lib/daru/plotting/nyaplot.rb +3 -0
  44. data/lib/daru/plotting/nyaplot/category.rb +34 -0
  45. data/lib/daru/plotting/nyaplot/dataframe.rb +187 -0
  46. data/lib/daru/plotting/nyaplot/vector.rb +46 -0
  47. data/lib/daru/vector.rb +694 -421
  48. data/lib/daru/version.rb +1 -1
  49. data/profile/_base.rb +23 -0
  50. data/profile/df_to_a.rb +10 -0
  51. data/profile/filter.rb +13 -0
  52. data/profile/joining.rb +13 -0
  53. data/profile/sorting.rb +12 -0
  54. data/profile/vector_each_with_index.rb +9 -0
  55. data/spec/accessors/wrappers_spec.rb +2 -4
  56. data/spec/categorical_spec.rb +1734 -0
  57. data/spec/core/group_by_spec.rb +52 -2
  58. data/spec/core/merge_spec.rb +63 -2
  59. data/spec/core/query_spec.rb +236 -80
  60. data/spec/dataframe_spec.rb +1373 -79
  61. data/spec/date_time/data_spec.rb +3 -5
  62. data/spec/date_time/index_spec.rb +154 -17
  63. data/spec/date_time/offsets_spec.rb +3 -4
  64. data/spec/fixtures/empties.dat +2 -0
  65. data/spec/fixtures/strings.dat +2 -0
  66. data/spec/formatters/table_formatter_spec.rb +99 -0
  67. data/spec/helpers_spec.rb +8 -0
  68. data/spec/index/categorical_index_spec.rb +168 -0
  69. data/spec/index/index_spec.rb +283 -0
  70. data/spec/index/multi_index_spec.rb +570 -0
  71. data/spec/io/io_spec.rb +31 -4
  72. data/spec/io/sql_data_source_spec.rb +0 -1
  73. data/spec/iruby/dataframe_spec.rb +172 -0
  74. data/spec/iruby/helpers_spec.rb +49 -0
  75. data/spec/iruby/multi_index_spec.rb +37 -0
  76. data/spec/iruby/vector_spec.rb +107 -0
  77. data/spec/math/arithmetic/dataframe_spec.rb +71 -13
  78. data/spec/math/arithmetic/vector_spec.rb +8 -10
  79. data/spec/math/statistics/dataframe_spec.rb +3 -5
  80. data/spec/math/statistics/vector_spec.rb +45 -55
  81. data/spec/monkeys_spec.rb +32 -9
  82. data/spec/plotting/dataframe_spec.rb +386 -0
  83. data/spec/plotting/vector_spec.rb +230 -0
  84. data/spec/shared/vector_display_spec.rb +215 -0
  85. data/spec/spec_helper.rb +23 -0
  86. data/spec/vector_spec.rb +905 -138
  87. metadata +143 -11
  88. data/.rubocop_todo.yml +0 -44
  89. data/lib/daru/plotting/dataframe.rb +0 -104
  90. data/lib/daru/plotting/vector.rb +0 -38
  91. data/spec/daru_spec.rb +0 -58
  92. data/spec/index_spec.rb +0 -375
@@ -1,5 +1,3 @@
1
- require 'spec_helper.rb'
2
-
3
1
  describe Daru::DataFrame do
4
2
  before :each do
5
3
  @data_frame = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
@@ -66,6 +64,33 @@ describe Daru::DataFrame do
66
64
  expect(df.vectors) .to eq(Daru::Index.new [:a,:b,:c,:d,:e])
67
65
  expect(df[:a]) .to eq(Daru::Vector.new [1,1,1,1])
68
66
  end
67
+
68
+ it 'derives index & order from arrays' do
69
+ df = Daru::DataFrame.rows @rows
70
+ expect(df.index) .to eq(Daru::Index.new [0,1,2,3])
71
+ expect(df.vectors) .to eq(Daru::Index.new %w[0 1 2 3 4])
72
+ end
73
+
74
+ it 'derives index & order from vectors' do
75
+ rows = @rows.zip(%w[w x y z]).map { |r, n| Daru::Vector.new r, index: [:a,:b,:c,:d,:e], name: n }
76
+ df = Daru::DataFrame.rows rows
77
+ expect(df.index) .to eq(Daru::Index.new %w[w x y z])
78
+ expect(df.vectors) .to eq(Daru::Index.new [:a,:b,:c,:d,:e])
79
+ end
80
+
81
+ it 'behaves, when rows are repeated' do
82
+ rows = @rows.zip(%w[w w y z]).map { |r, n| Daru::Vector.new r, index: [:a,:b,:c,:d,:e], name: n }
83
+ df = Daru::DataFrame.rows rows
84
+ expect(df.index) .to eq(Daru::Index.new %w[w_1 w_2 y z])
85
+ expect(df.vectors) .to eq(Daru::Index.new [:a,:b,:c,:d,:e])
86
+ end
87
+
88
+ it 'behaves, when vectors are unnamed' do
89
+ rows = @rows.map { |r| Daru::Vector.new r, index: [:a,:b,:c,:d,:e] }
90
+ df = Daru::DataFrame.rows rows
91
+ expect(df.index) .to eq(Daru::Index.new [0,1,2,3])
92
+ expect(df.vectors) .to eq(Daru::Index.new [:a,:b,:c,:d,:e])
93
+ end
69
94
  end
70
95
 
71
96
  context Daru::MultiIndex do
@@ -133,7 +158,7 @@ describe Daru::DataFrame do
133
158
  end
134
159
 
135
160
  it "initializes from a Hash of Vectors" do
136
- va = Daru::Vector.new([1,2,3,4,5], metadata: { cdc_type: 2 }, index: [:one, :two, :three, :four, :five])
161
+ va = Daru::Vector.new([1,2,3,4,5], index: [:one, :two, :three, :four, :five])
137
162
  vb = Daru::Vector.new([11,12,13,14,15], index: [:one, :two, :three, :four, :five])
138
163
 
139
164
  df = Daru::DataFrame.new({ b: vb, a: va }, order: [:a, :b], index: [:one, :two, :three, :four, :five])
@@ -142,8 +167,6 @@ describe Daru::DataFrame do
142
167
  expect(df.vectors).to eq(Daru::Index.new [:a, :b])
143
168
  expect(df.a.class).to eq(Daru::Vector)
144
169
  expect(df.a) .to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
145
- expect(df.a.metadata).to eq({ cdc_type: 2 })
146
- expect(df.b.metadata).to eq({})
147
170
  end
148
171
 
149
172
  it "initializes from an Array of Hashes" do
@@ -422,13 +445,6 @@ describe Daru::DataFrame do
422
445
  expect(@df[:a, :b]).to eq(temp)
423
446
  end
424
447
 
425
- it "returns a DataFrame with metadata" do
426
- @df[:a].metadata = "alpha"
427
- @df[:b].metadata = "beta"
428
- subset_df = @df[:a, :b]
429
- expect([:a, :b].map { |v| subset_df[v].metadata }).to eq(["alpha", "beta"])
430
- end
431
-
432
448
  it "accesses vector with Integer index" do
433
449
  expect(@df[0]).to eq([1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]))
434
450
  end
@@ -440,6 +456,11 @@ describe Daru::DataFrame do
440
456
  c: [11,22,33,44,55]
441
457
  }, index: [:one, :two, :three, :four, :five]))
442
458
  end
459
+
460
+ it 'accepts axis parameter as a last argument' do
461
+ expect(@df[:a, :vector]).to eq @df[:a]
462
+ expect(@df[:one, :row]).to eq [1, 11, 11].dv(:one, [:a, :b, :c])
463
+ end
443
464
  end
444
465
 
445
466
  context Daru::MultiIndex do
@@ -540,22 +561,6 @@ describe Daru::DataFrame do
540
561
  expect(df_empty[:b].name).to equal(:b)
541
562
  end
542
563
 
543
- it "copies metadata when the target is a vector" do
544
- vec = Daru::Vector.new(1.upto(@df.size), index: @df.index, metadata: { cdc_type: 2 })
545
- @df[:woo] = vec.dup
546
- expect(@df[:woo].metadata).to eq vec.metadata
547
- end
548
-
549
- it "doesn't delete metadata when the source is a dataframe with empty vectors" do
550
- empty_df = Daru::DataFrame.new({
551
- a: Daru::Vector.new([], metadata: 'alpha'),
552
- b: Daru::Vector.new([], metadata: 'beta'),
553
- })
554
-
555
- empty_df[:c] = Daru::Vector.new(1.upto(3))
556
- expect(empty_df[:a].metadata).to eq 'alpha'
557
- end
558
-
559
564
  it "appends multiple vectors at a time" do
560
565
  # TODO
561
566
  end
@@ -606,6 +611,70 @@ describe Daru::DataFrame do
606
611
  end
607
612
  end
608
613
 
614
+ context '#method_missing' do
615
+ subject(:data_frame) {
616
+ Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
617
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
618
+ index: [:one, :two, :three, :four, :five])
619
+ }
620
+
621
+ context 'getting the vector' do
622
+ subject{
623
+ data_frame.a
624
+ }
625
+ it { is_expected.to eq [1,2,3,4,5].dv(:a, [:one, :two, :three, :four, :five]) }
626
+ end
627
+
628
+ context 'setting existing vector' do
629
+ before{
630
+ data_frame.a = [100,200,300,400,500]
631
+ }
632
+ it { is_expected.to eq(Daru::DataFrame.new({
633
+ b: [11,12,13,14,15],
634
+ a: [100,200,300,400,500],
635
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
636
+ index: [:one, :two, :three, :four, :five]))
637
+ }
638
+ end
639
+
640
+ context 'setting new vector' do
641
+ before{
642
+ data_frame.d = [100,200,300,400,500]
643
+ }
644
+ it { is_expected.to eq(Daru::DataFrame.new({
645
+ b: [11,12,13,14,15],
646
+ a: [1,2,3,4,5],
647
+ d: [100,200,300,400,500],
648
+ c: [11,22,33,44,55]}, order: [:a, :b, :c, :d],
649
+ index: [:one, :two, :three, :four, :five]))
650
+ }
651
+ end
652
+
653
+ context 'no vector found' do
654
+ it 'should raise' do
655
+ expect { data_frame.e }.to raise_error(NoMethodError)
656
+ end
657
+ end
658
+ end
659
+
660
+ context '#add_vector' do
661
+ subject(:data_frame) {
662
+ Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
663
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
664
+ index: [:one, :two, :three, :four, :five])
665
+ }
666
+ before {
667
+ data_frame.add_vector :a, [100,200,300,400,500]
668
+ }
669
+
670
+ it { is_expected.to eq(Daru::DataFrame.new({
671
+ b: [11,12,13,14,15],
672
+ a: [100,200,300,400,500],
673
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
674
+ index: [:one, :two, :three, :four, :five]))
675
+ }
676
+ end
677
+
609
678
  context "#row[]=" do
610
679
  context Daru::Index do
611
680
  before :each do
@@ -685,10 +754,578 @@ describe Daru::DataFrame do
685
754
 
686
755
  context Daru::MultiIndex do
687
756
  pending
757
+ # TO DO
758
+ end
759
+
760
+ context Daru::CategoricalIndex do
761
+ let(:idx) { Daru::CategoricalIndex.new [:a, 1, :a, 1, :c] }
762
+ let(:df) do
763
+ Daru::DataFrame.new({
764
+ a: 'a'..'e',
765
+ b: 1..5
766
+ }, index: idx)
767
+ end
768
+
769
+ context "modify exiting row" do
770
+ context "single category" do
771
+ subject { df }
772
+ before { df.row[:a] = ['x', 'y'] }
773
+
774
+ it { is_expected.to be_a Daru::DataFrame }
775
+ its(:index) { is_expected.to eq idx }
776
+ its(:vectors) { is_expected.to eq Daru::Index.new [:a, :b] }
777
+ its(:'a.to_a') { is_expected.to eq ['x', 'b', 'x', 'd', 'e'] }
778
+ its(:'b.to_a') { is_expected.to eq ['y', 2, 'y', 4, 5] }
779
+ end
780
+
781
+ context "multiple categories" do
782
+ subject { df }
783
+ before { df.row[:a, 1] = ['x', 'y'] }
784
+
785
+ it { is_expected.to be_a Daru::DataFrame }
786
+ its(:index) { is_expected.to eq idx }
787
+ its(:vectors) { is_expected.to eq Daru::Index.new [:a, :b] }
788
+ its(:'a.to_a') { is_expected.to eq ['x', 'x', 'x', 'x', 'e'] }
789
+ its(:'b.to_a') { is_expected.to eq ['y', 'y', 'y', 'y', 5] }
790
+ end
791
+
792
+ context "positional index" do
793
+ subject { df }
794
+ before { df.row[0, 2] = ['x', 'y'] }
795
+
796
+ it { is_expected.to be_a Daru::DataFrame }
797
+ its(:index) { is_expected.to eq idx }
798
+ its(:vectors) { is_expected.to eq Daru::Index.new [:a, :b] }
799
+ its(:'a.to_a') { is_expected.to eq ['x', 'b', 'x', 'd', 'e'] }
800
+ its(:'b.to_a') { is_expected.to eq ['y', 2, 'y', 4, 5] }
801
+ end
802
+ end
803
+
804
+ context "add new row" do
805
+ # TODO
806
+ end
807
+ end
808
+ end
809
+
810
+ context "#row.at" do
811
+ context Daru::Index do
812
+ let(:idx) { Daru::Index.new [1, 0, :c] }
813
+ let(:df) do
814
+ Daru::DataFrame.new({
815
+ a: 1..3,
816
+ b: 'a'..'c'
817
+ }, index: idx)
818
+ end
819
+
820
+ context "single position" do
821
+ subject { df.row.at 1 }
822
+
823
+ it { is_expected.to be_a Daru::Vector }
824
+ its(:size) { is_expected.to eq 2 }
825
+ its(:to_a) { is_expected.to eq [2, 'b'] }
826
+ its(:'index.to_a') { is_expected.to eq [:a, :b] }
827
+ end
828
+
829
+ context "multiple positions" do
830
+ subject { df.row.at 0, 2 }
831
+
832
+ it { is_expected.to be_a Daru::DataFrame }
833
+ its(:size) { is_expected.to eq 2 }
834
+ its(:'index.to_a') { is_expected.to eq [1, :c] }
835
+ its(:'a.to_a') { is_expected.to eq [1, 3] }
836
+ its(:'b.to_a') { is_expected.to eq ['a', 'c'] }
837
+ end
838
+
839
+ context "invalid position" do
840
+ it { expect { df.row.at 3 }.to raise_error IndexError }
841
+ end
842
+
843
+ context "invalid positions" do
844
+ it { expect { df.row.at 2, 3 }.to raise_error IndexError }
845
+ end
846
+
847
+ context "range" do
848
+ subject { df.row.at 0..1 }
849
+
850
+ it { is_expected.to be_a Daru::DataFrame }
851
+ its(:size) { is_expected.to eq 2 }
852
+ its(:'index.to_a') { is_expected.to eq [1, 0] }
853
+ its(:'a.to_a') { is_expected.to eq [1, 2] }
854
+ its(:'b.to_a') { is_expected.to eq ['a', 'b'] }
855
+ end
856
+
857
+ context "range with negative end" do
858
+ subject { df.row.at 0..-2 }
859
+
860
+ it { is_expected.to be_a Daru::DataFrame }
861
+ its(:size) { is_expected.to eq 2 }
862
+ its(:'index.to_a') { is_expected.to eq [1, 0] }
863
+ its(:'a.to_a') { is_expected.to eq [1, 2] }
864
+ its(:'b.to_a') { is_expected.to eq ['a', 'b'] }
865
+ end
866
+
867
+ context "range with single element" do
868
+ subject { df.row.at 0..0 }
869
+
870
+ it { is_expected.to be_a Daru::DataFrame }
871
+ its(:size) { is_expected.to eq 1 }
872
+ its(:'index.to_a') { is_expected.to eq [1] }
873
+ its(:'a.to_a') { is_expected.to eq [1] }
874
+ its(:'b.to_a') { is_expected.to eq ['a'] }
875
+ end
876
+ end
877
+
878
+ context Daru::MultiIndex do
879
+ let (:idx) do
880
+ Daru::MultiIndex.from_tuples [
881
+ [:a,:one,:bar],
882
+ [:a,:one,:baz],
883
+ [:b,:two,:bar],
884
+ [:a,:two,:baz],
885
+ ]
886
+ end
887
+ let (:df) do
888
+ Daru::DataFrame.new({
889
+ a: 1..4,
890
+ b: 'a'..'d'
891
+ }, index: idx )
892
+ end
893
+
894
+ context "single position" do
895
+ subject { df.row.at 1 }
896
+
897
+ it { is_expected.to be_a Daru::Vector }
898
+ its(:size) { is_expected.to eq 2 }
899
+ its(:to_a) { is_expected.to eq [2, 'b'] }
900
+ its(:'index.to_a') { is_expected.to eq [:a, :b] }
901
+ end
902
+
903
+ context "multiple positions" do
904
+ subject { df.row.at 0, 2 }
905
+
906
+ it { is_expected.to be_a Daru::DataFrame }
907
+ its(:size) { is_expected.to eq 2 }
908
+ its(:'index.to_a') { is_expected.to eq [[:a, :one, :bar],
909
+ [:b, :two, :bar]] }
910
+ its(:'a.to_a') { is_expected.to eq [1, 3] }
911
+ its(:'a.index.to_a') { is_expected.to eq [[:a, :one, :bar],
912
+ [:b, :two, :bar]] }
913
+ its(:'b.to_a') { is_expected.to eq ['a', 'c'] }
914
+ end
915
+
916
+ context "invalid position" do
917
+ it { expect { df.row.at 4 }.to raise_error IndexError }
918
+ end
919
+
920
+ context "invalid positions" do
921
+ it { expect { df.row.at 3, 4 }.to raise_error IndexError }
922
+ end
923
+
924
+ context "range" do
925
+ subject { df.row.at 0..1 }
926
+
927
+ it { is_expected.to be_a Daru::DataFrame }
928
+ its(:size) { is_expected.to eq 2 }
929
+ its(:'index.to_a') { is_expected.to eq [[:a, :one, :bar],
930
+ [:a, :one, :baz]] }
931
+ its(:'a.to_a') { is_expected.to eq [1, 2] }
932
+ its(:'a.index.to_a') { is_expected.to eq [[:a, :one, :bar],
933
+ [:a, :one, :baz]] }
934
+ its(:'b.to_a') { is_expected.to eq ['a', 'b'] }
935
+ end
936
+
937
+ context "range with negative end" do
938
+ subject { df.row.at 0..-3 }
939
+
940
+ it { is_expected.to be_a Daru::DataFrame }
941
+ its(:size) { is_expected.to eq 2 }
942
+ its(:'index.to_a') { is_expected.to eq [[:a, :one, :bar],
943
+ [:a, :one, :baz]] }
944
+ its(:'a.to_a') { is_expected.to eq [1, 2] }
945
+ its(:'a.index.to_a') { is_expected.to eq [[:a, :one, :bar],
946
+ [:a, :one, :baz]] }
947
+ its(:'b.to_a') { is_expected.to eq ['a', 'b'] }
948
+ end
949
+
950
+ context " range with single element" do
951
+ subject { df.row.at 0..0 }
952
+
953
+ it { is_expected.to be_a Daru::DataFrame }
954
+ its(:size) { is_expected.to eq 1 }
955
+ its(:'index.to_a') { is_expected.to eq [[:a, :one, :bar]] }
956
+ its(:'a.to_a') { is_expected.to eq [1] }
957
+ its(:'a.index.to_a') { is_expected.to eq [[:a, :one, :bar]] }
958
+ its(:'b.to_a') { is_expected.to eq ['a'] }
959
+ end
960
+ end
961
+
962
+ context Daru::CategoricalIndex do
963
+ let (:idx) { Daru::CategoricalIndex.new [:a, 1, 1, :a, :c] }
964
+ let (:df) do
965
+ Daru::DataFrame.new({
966
+ a: 1..5,
967
+ b: 'a'..'e'
968
+ }, index: idx )
969
+ end
970
+
971
+ context "single positional index" do
972
+ subject { df.row.at 1 }
973
+
974
+ it { is_expected.to be_a Daru::Vector }
975
+ its(:size) { is_expected.to eq 2 }
976
+ its(:to_a) { is_expected.to eq [2, 'b'] }
977
+ its(:'index.to_a') { is_expected.to eq [:a, :b] }
978
+ end
979
+
980
+ context "multiple positional indexes" do
981
+ subject { df.row.at 0, 2 }
982
+
983
+ it { is_expected.to be_a Daru::DataFrame }
984
+ its(:size) { is_expected.to eq 2 }
985
+ its(:'index.to_a') { is_expected.to eq [:a, 1] }
986
+ its(:'a.to_a') { is_expected.to eq [1, 3] }
987
+ its(:'a.index.to_a') { is_expected.to eq [:a, 1] }
988
+ its(:'b.to_a') { is_expected.to eq ['a', 'c'] }
989
+ its(:'b.index.to_a') { is_expected.to eq [:a, 1] }
990
+ end
991
+
992
+ context "invalid position" do
993
+ it { expect { df.at 5 }.to raise_error IndexError }
994
+ end
995
+
996
+ context "invalid positions" do
997
+ it { expect { df.at 4, 5 }.to raise_error IndexError }
998
+ end
999
+
1000
+ context "range" do
1001
+ subject { df.row.at 0..1 }
1002
+
1003
+ it { is_expected.to be_a Daru::DataFrame }
1004
+ its(:size) { is_expected.to eq 2 }
1005
+ its(:'index.to_a') { is_expected.to eq [:a, 1] }
1006
+ its(:'a.to_a') { is_expected.to eq [1, 2] }
1007
+ its(:'a.index.to_a') { is_expected.to eq [:a, 1] }
1008
+ its(:'b.to_a') { is_expected.to eq ['a', 'b'] }
1009
+ its(:'b.index.to_a') { is_expected.to eq [:a, 1] }
1010
+ end
1011
+
1012
+ context "range with negative end" do
1013
+ subject { df.row.at 0..-4 }
1014
+
1015
+ it { is_expected.to be_a Daru::DataFrame }
1016
+ its(:size) { is_expected.to eq 2 }
1017
+ its(:'index.to_a') { is_expected.to eq [:a, 1] }
1018
+ its(:'a.to_a') { is_expected.to eq [1, 2] }
1019
+ its(:'a.index.to_a') { is_expected.to eq [:a, 1] }
1020
+ its(:'b.to_a') { is_expected.to eq ['a', 'b'] }
1021
+ its(:'b.index.to_a') { is_expected.to eq [:a, 1] }
1022
+ end
1023
+
1024
+ context " range with single element" do
1025
+ subject { df.row.at 0..0 }
1026
+
1027
+ it { is_expected.to be_a Daru::DataFrame }
1028
+ its(:size) { is_expected.to eq 1 }
1029
+ its(:'index.to_a') { is_expected.to eq [:a] }
1030
+ its(:'a.to_a') { is_expected.to eq [1] }
1031
+ its(:'a.index.to_a') { is_expected.to eq [:a] }
1032
+ its(:'b.to_a') { is_expected.to eq ['a'] }
1033
+ its(:'b.index.to_a') { is_expected.to eq [:a] }
1034
+ end
1035
+ end
1036
+ end
1037
+
1038
+ context "#row.set_at" do
1039
+ let(:df) do
1040
+ Daru::DataFrame.new({
1041
+ a: 1..3,
1042
+ b: 'a'..'c'
1043
+ })
1044
+ end
1045
+
1046
+ context "single position" do
1047
+ subject { df }
1048
+ before { df.row.set_at [1], ['x', 'y'] }
1049
+
1050
+ its(:size) { is_expected.to eq 3 }
1051
+ its(:'a.to_a') { is_expected.to eq [1, 'x', 3] }
1052
+ its(:'b.to_a') { is_expected.to eq ['a', 'y', 'c'] }
1053
+ end
1054
+
1055
+ context "multiple position" do
1056
+ subject { df }
1057
+ before { df.row.set_at [0, 2], ['x', 'y'] }
1058
+
1059
+ its(:size) { is_expected.to eq 3 }
1060
+ its(:'a.to_a') { is_expected.to eq ['x', 2, 'x'] }
1061
+ its(:'b.to_a') { is_expected.to eq ['y', 'b', 'y'] }
1062
+ end
1063
+
1064
+ context "invalid position" do
1065
+ it { expect { df.row.set_at [3], ['x', 'y'] }.to raise_error IndexError }
1066
+ end
1067
+
1068
+ context "invalid positions" do
1069
+ it { expect { df.row.set_at [2, 3], ['x', 'y'] }.to raise_error IndexError }
1070
+ end
1071
+
1072
+ context "incorrect size" do
1073
+ it { expect { df.row.set_at [1], ['x', 'y', 'z'] }.to raise_error SizeError }
1074
+ end
1075
+ end
1076
+
1077
+ context "#at" do
1078
+ context Daru::Index do
1079
+ let(:idx) { Daru::Index.new [:a, :b, :c] }
1080
+ let(:df) do
1081
+ Daru::DataFrame.new({
1082
+ 1 => 1..3,
1083
+ a: 'a'..'c',
1084
+ b: 11..13
1085
+ }, index: idx)
1086
+ end
1087
+
1088
+ context "single position" do
1089
+ subject { df.at 1 }
1090
+
1091
+ it { is_expected.to be_a Daru::Vector }
1092
+ its(:size) { is_expected.to eq 3 }
1093
+ its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
1094
+ its(:index) { is_expected.to eq idx }
1095
+ end
1096
+
1097
+ context "multiple positions" do
1098
+ subject { df.at 0, 2 }
1099
+
1100
+ it { is_expected.to be_a Daru::DataFrame }
1101
+ its(:shape) { is_expected.to eq [3, 2] }
1102
+ its(:index) { is_expected.to eq idx }
1103
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1104
+ its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1105
+ end
1106
+
1107
+ context "single invalid position" do
1108
+ it { expect { df. at 3 }.to raise_error IndexError }
1109
+ end
1110
+
1111
+ context "multiple invalid positions" do
1112
+ it { expect { df.at 2, 3 }.to raise_error IndexError }
1113
+ end
1114
+
1115
+ context "range" do
1116
+ subject { df.at 0..1 }
1117
+
1118
+ it { is_expected.to be_a Daru::DataFrame }
1119
+ its(:shape) { is_expected.to eq [3, 2] }
1120
+ its(:index) { is_expected.to eq idx }
1121
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1122
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1123
+ end
1124
+
1125
+ context "range with negative end" do
1126
+ subject { df.at 0..-2 }
1127
+
1128
+ it { is_expected.to be_a Daru::DataFrame }
1129
+ its(:shape) { is_expected.to eq [3, 2] }
1130
+ its(:index) { is_expected.to eq idx }
1131
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1132
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1133
+ end
1134
+
1135
+ context "range with single element" do
1136
+ subject { df.at 1..1 }
1137
+
1138
+ it { is_expected.to be_a Daru::DataFrame }
1139
+ its(:shape) { is_expected.to eq [3, 1] }
1140
+ its(:index) { is_expected.to eq idx }
1141
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1142
+ end
1143
+ end
1144
+
1145
+ context Daru::MultiIndex do
1146
+ let (:idx) do
1147
+ Daru::MultiIndex.from_tuples [
1148
+ [:a,:one,:bar],
1149
+ [:a,:one,:baz],
1150
+ [:b,:two,:bar],
1151
+ ]
1152
+ end
1153
+ let(:df) do
1154
+ Daru::DataFrame.new({
1155
+ 1 => 1..3,
1156
+ a: 'a'..'c',
1157
+ b: 11..13
1158
+ }, index: idx)
1159
+ end
1160
+
1161
+ context "single position" do
1162
+ subject { df.at 1 }
1163
+
1164
+ it { is_expected.to be_a Daru::Vector }
1165
+ its(:size) { is_expected.to eq 3 }
1166
+ its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
1167
+ its(:index) { is_expected.to eq idx }
1168
+ end
1169
+
1170
+ context "multiple positions" do
1171
+ subject { df.at 0, 2 }
1172
+
1173
+ it { is_expected.to be_a Daru::DataFrame }
1174
+ its(:shape) { is_expected.to eq [3, 2] }
1175
+ its(:index) { is_expected.to eq idx }
1176
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1177
+ its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1178
+ end
1179
+
1180
+ context "single invalid position" do
1181
+ it { expect { df. at 3 }.to raise_error IndexError }
1182
+ end
1183
+
1184
+ context "multiple invalid positions" do
1185
+ it { expect { df.at 2, 3 }.to raise_error IndexError }
1186
+ end
1187
+
1188
+ context "range" do
1189
+ subject { df.at 0..1 }
1190
+
1191
+ it { is_expected.to be_a Daru::DataFrame }
1192
+ its(:shape) { is_expected.to eq [3, 2] }
1193
+ its(:index) { is_expected.to eq idx }
1194
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1195
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1196
+ end
1197
+
1198
+ context "range with negative end" do
1199
+ subject { df.at 0..-2 }
1200
+
1201
+ it { is_expected.to be_a Daru::DataFrame }
1202
+ its(:shape) { is_expected.to eq [3, 2] }
1203
+ its(:index) { is_expected.to eq idx }
1204
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1205
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1206
+ end
1207
+
1208
+ context "range with single element" do
1209
+ subject { df.at 1..1 }
1210
+
1211
+ it { is_expected.to be_a Daru::DataFrame }
1212
+ its(:shape) { is_expected.to eq [3, 1] }
1213
+ its(:index) { is_expected.to eq idx }
1214
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1215
+ end
1216
+ end
1217
+
1218
+ context Daru::CategoricalIndex do
1219
+ let (:idx) { Daru::CategoricalIndex.new [:a, 1, 1] }
1220
+ let(:df) do
1221
+ Daru::DataFrame.new({
1222
+ 1 => 1..3,
1223
+ a: 'a'..'c',
1224
+ b: 11..13
1225
+ }, index: idx)
1226
+ end
1227
+
1228
+ context "single position" do
1229
+ subject { df.at 1 }
1230
+
1231
+ it { is_expected.to be_a Daru::Vector }
1232
+ its(:size) { is_expected.to eq 3 }
1233
+ its(:to_a) { is_expected.to eq ['a', 'b', 'c'] }
1234
+ its(:index) { is_expected.to eq idx }
1235
+ end
1236
+
1237
+ context "multiple positions" do
1238
+ subject { df.at 0, 2 }
1239
+
1240
+ it { is_expected.to be_a Daru::DataFrame }
1241
+ its(:shape) { is_expected.to eq [3, 2] }
1242
+ its(:index) { is_expected.to eq idx }
1243
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1244
+ its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1245
+ end
1246
+
1247
+ context "single invalid position" do
1248
+ it { expect { df. at 3 }.to raise_error IndexError }
1249
+ end
1250
+
1251
+ context "multiple invalid positions" do
1252
+ it { expect { df.at 2, 3 }.to raise_error IndexError }
1253
+ end
1254
+
1255
+ context "range" do
1256
+ subject { df.at 0..1 }
1257
+
1258
+ it { is_expected.to be_a Daru::DataFrame }
1259
+ its(:shape) { is_expected.to eq [3, 2] }
1260
+ its(:index) { is_expected.to eq idx }
1261
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1262
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1263
+ end
1264
+
1265
+ context "range with negative index" do
1266
+ subject { df.at 0..-2 }
1267
+
1268
+ it { is_expected.to be_a Daru::DataFrame }
1269
+ its(:shape) { is_expected.to eq [3, 2] }
1270
+ its(:index) { is_expected.to eq idx }
1271
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1272
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1273
+ end
1274
+
1275
+ context "range with single element" do
1276
+ subject { df.at 1..1 }
1277
+
1278
+ it { is_expected.to be_a Daru::DataFrame }
1279
+ its(:shape) { is_expected.to eq [3, 1] }
1280
+ its(:index) { is_expected.to eq idx }
1281
+ its(:'a.to_a') { is_expected.to eq ['a', 'b', 'c'] }
1282
+ end
1283
+ end
1284
+ end
1285
+
1286
+ context "#set_at" do
1287
+ let(:df) do
1288
+ Daru::DataFrame.new({
1289
+ 1 => 1..3,
1290
+ a: 'a'..'c',
1291
+ b: 11..13
1292
+ })
1293
+ end
1294
+
1295
+ context "single position" do
1296
+ subject { df }
1297
+ before { df.set_at [1], ['x', 'y', 'z'] }
1298
+
1299
+ its(:shape) { is_expected.to eq [3, 3] }
1300
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1301
+ its(:'a.to_a') { is_expected.to eq ['x', 'y', 'z'] }
1302
+ its(:'b.to_a') { is_expected.to eq [11, 12, 13] }
1303
+ end
1304
+
1305
+ context "multiple position" do
1306
+ subject { df }
1307
+ before { df.set_at [1, 2], ['x', 'y', 'z'] }
1308
+
1309
+ its(:shape) { is_expected.to eq [3, 3] }
1310
+ it { expect(df[1].to_a).to eq [1, 2, 3] }
1311
+ its(:'a.to_a') { is_expected.to eq ['x', 'y', 'z'] }
1312
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'z'] }
1313
+ end
1314
+
1315
+ context "invalid position" do
1316
+ it { expect { df.set_at [3], ['x', 'y', 'z'] }.to raise_error IndexError }
1317
+ end
1318
+
1319
+ context "invalid positions" do
1320
+ it { expect { df.set_at [2, 3], ['x', 'y', 'z'] }.to raise_error IndexError }
1321
+ end
1322
+
1323
+ context "incorrect size" do
1324
+ it { expect { df.set_at [1], ['x', 'y'] }.to raise_error SizeError }
688
1325
  end
689
1326
  end
690
1327
 
691
- context "#row" do
1328
+ context "#row[]" do
692
1329
  context Daru::Index do
693
1330
  before :each do
694
1331
  @df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
@@ -741,7 +1378,7 @@ describe Daru::DataFrame do
741
1378
  expect(@df_mi.row[0]).to eq(Daru::Vector.new([11,1,11,1], index: @order_mi))
742
1379
  end
743
1380
 
744
- it "returns a DataFrame when specifying numeric range" do
1381
+ it "returns a DataFrame whecn specifying numeric range" do
745
1382
  sub_index = Daru::MultiIndex.from_tuples([
746
1383
  [:a,:one,:bar],
747
1384
  [:a,:one,:baz]
@@ -787,9 +1424,97 @@ describe Daru::DataFrame do
787
1424
  ], index: sub_index, order: @order_mi))
788
1425
  end
789
1426
  end
1427
+
1428
+ context Daru::CategoricalIndex do
1429
+ let(:idx) { Daru::CategoricalIndex.new [:a, 1, :a, 1, :c] }
1430
+ let(:df) do
1431
+ Daru::DataFrame.new({
1432
+ a: 'a'..'e',
1433
+ b: 1..5
1434
+ }, index: idx)
1435
+ end
1436
+
1437
+ context "single category" do
1438
+ context "multiple instances" do
1439
+ subject { df.row[:a] }
1440
+
1441
+ it { is_expected.to be_a Daru::DataFrame }
1442
+ its(:index) { is_expected.to eq Daru::CategoricalIndex.new [:a, :a] }
1443
+ its(:vectors) { is_expected.to eq Daru::Index.new [:a, :b] }
1444
+ its(:a) { Daru::Vector.new ['a', 'c'] }
1445
+ its(:b) { Daru::Vector.new [1, 3] }
1446
+ end
1447
+
1448
+ context "single instance" do
1449
+ subject { df.row[:c] }
1450
+
1451
+ it { is_expected.to be_a Daru::Vector }
1452
+ its(:index) { is_expected.to eq Daru::Index.new [:a, :b] }
1453
+ its(:to_a) { is_expected.to eq ['e', 5] }
1454
+ end
1455
+ end
1456
+
1457
+ context "multiple categories" do
1458
+ subject { df.row[:a, 1] }
1459
+
1460
+ it { is_expected.to be_a Daru::DataFrame }
1461
+ its(:index) { is_expected.to eq Daru::CategoricalIndex.new(
1462
+ [:a, 1, :a, 1 ]) }
1463
+ its(:vectors) { is_expected.to eq Daru::Index.new [:a, :b] }
1464
+ its(:a) { Daru::Vector.new ['a', 'c', 'b', 'd'] }
1465
+ its(:b) { Daru::Vector.new [1, 3, 2, 4] }
1466
+ end
1467
+
1468
+ context "positional index" do
1469
+ subject { df.row[0] }
1470
+
1471
+ it { is_expected.to be_a Daru::Vector }
1472
+ its(:index) { is_expected.to eq Daru::Index.new [:a, :b] }
1473
+ its(:to_a) { is_expected.to eq ['a', 1] }
1474
+ end
1475
+
1476
+ context "invalid positional index" do
1477
+ it { expect { df.row[5] }.to raise_error IndexError }
1478
+ end
1479
+
1480
+ context "invalid category" do
1481
+ it { expect { df.row[:d] }.to raise_error IndexError }
1482
+ end
1483
+ end
790
1484
  end
791
1485
 
792
1486
  context "#add_row" do
1487
+ subject(:data_frame) {
1488
+ Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
1489
+ c: [11,22,33,44,55]}, order: [:a, :b, :c],
1490
+ index: [:one, :two, :three, :four, :five])
1491
+ }
1492
+ context 'named' do
1493
+ before {
1494
+ data_frame.add_row [100,200,300], :six
1495
+ }
1496
+
1497
+ it { is_expected.to eq(Daru::DataFrame.new({
1498
+ a: [1,2,3,4,5,100],
1499
+ b: [11,12,13,14,15,200],
1500
+ c: [11,22,33,44,55,300]}, order: [:a, :b, :c],
1501
+ index: [:one, :two, :three, :four, :five, :six]))
1502
+ }
1503
+ end
1504
+
1505
+ context 'unnamed' do
1506
+ before {
1507
+ data_frame.add_row [100,200,300]
1508
+ }
1509
+
1510
+ it { is_expected.to eq(Daru::DataFrame.new({
1511
+ a: [1,2,3,4,5,100],
1512
+ b: [11,12,13,14,15,200],
1513
+ c: [11,22,33,44,55,300]}, order: [:a, :b, :c],
1514
+ index: [:one, :two, :three, :four, :five, 5]))
1515
+ }
1516
+ end
1517
+
793
1518
  it "allows adding rows after making empty DF by specfying only order" do
794
1519
  df = Daru::DataFrame.new({}, order: [:a, :b, :c])
795
1520
  df.add_row [1,2,3]
@@ -802,6 +1527,40 @@ describe Daru::DataFrame do
802
1527
  end
803
1528
  end
804
1529
 
1530
+ context "#first" do
1531
+ it 'works' do
1532
+ expect(@data_frame.first(2)).to eq(
1533
+ Daru::DataFrame.new({b: [11,12], a: [1,2], c: [11,22]},
1534
+ order: [:a, :b, :c],
1535
+ index: [:one, :two]))
1536
+ end
1537
+
1538
+ it 'works with too large values' do
1539
+ expect(@data_frame.first(200)).to eq(@data_frame)
1540
+ end
1541
+
1542
+ it 'has synonym' do
1543
+ expect(@data_frame.first(2)).to eq(@data_frame.head(2))
1544
+ end
1545
+ end
1546
+
1547
+ context "#last" do
1548
+ it 'works' do
1549
+ expect(@data_frame.last(2)).to eq(
1550
+ Daru::DataFrame.new({b: [14,15], a: [4,5], c: [44,55]},
1551
+ order: [:a, :b, :c],
1552
+ index: [:four, :five]))
1553
+ end
1554
+
1555
+ it 'works with too large values' do
1556
+ expect(@data_frame.last(200)).to eq(@data_frame)
1557
+ end
1558
+
1559
+ it 'has synonym' do
1560
+ expect(@data_frame.last(2)).to eq(@data_frame.tail(2))
1561
+ end
1562
+ end
1563
+
805
1564
  context "#==" do
806
1565
  it "compares by vectors, index and values of a DataFrame (ignores name)" do
807
1566
  a = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
@@ -814,6 +1573,13 @@ describe Daru::DataFrame do
814
1573
  end
815
1574
  end
816
1575
 
1576
+ context '#rename' do
1577
+ subject { @data_frame.rename 'other' }
1578
+
1579
+ it { is_expected.to be_a Daru::DataFrame }
1580
+ its(:name) { is_expected.to eq 'other' }
1581
+ end
1582
+
817
1583
  context "#dup" do
818
1584
  context Daru::Index do
819
1585
  it "dups every data structure inside DataFrame" do
@@ -841,31 +1607,122 @@ describe Daru::DataFrame do
841
1607
  end
842
1608
  end
843
1609
 
844
- context "#dup_only_valid" do
845
- before do
846
- @missing_data_df = Daru::DataFrame.new({
847
- a: [1 , 2, 3, nil, 4, nil, 5],
848
- b: [nil, 2, 3, nil, 4, nil, 5],
849
- c: [1, 2, 3, 43 , 4, nil, 5]
1610
+ context '#reject_values' do
1611
+ let(:df) do
1612
+ Daru::DataFrame.new({
1613
+ a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
1614
+ b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
1615
+ c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
1616
+ }, index: 11..18)
1617
+ end
1618
+ before { df.to_category :b }
1619
+
1620
+ context 'remove nils only' do
1621
+ subject { df.reject_values nil }
1622
+ it { is_expected.to be_a Daru::DataFrame }
1623
+ its(:'b.type') { is_expected.to eq :category }
1624
+ its(:'a.to_a') { is_expected.to eq [1, 2, 7] }
1625
+ its(:'b.to_a') { is_expected.to eq [:a, :b, 8] }
1626
+ its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 7] }
1627
+ its(:'index.to_a') { is_expected.to eq [11, 12, 18] }
1628
+ end
1629
+
1630
+ context 'remove Float::NAN only' do
1631
+ subject { df.reject_values Float::NAN }
1632
+ it { is_expected.to be_a Daru::DataFrame }
1633
+ its(:'b.type') { is_expected.to eq :category }
1634
+ its(:'a.to_a') { is_expected.to eq [1, 3, nil, 1, 7] }
1635
+ its(:'b.to_a') { is_expected.to eq [:a, nil, 3, 5, 8] }
1636
+ its(:'c.to_a') { is_expected.to eq ['a', 3, 5, nil, 7] }
1637
+ its(:'index.to_a') { is_expected.to eq [11, 13, 16, 17, 18] }
1638
+ end
1639
+
1640
+ context 'remove both nil and Float::NAN' do
1641
+ subject { df.reject_values nil, Float::NAN }
1642
+ it { is_expected.to be_a Daru::DataFrame }
1643
+ its(:'b.type') { is_expected.to eq :category }
1644
+ its(:'a.to_a') { is_expected.to eq [1, 7] }
1645
+ its(:'b.to_a') { is_expected.to eq [:a, 8] }
1646
+ its(:'c.to_a') { is_expected.to eq ['a', 7] }
1647
+ its(:'index.to_a') { is_expected.to eq [11, 18] }
1648
+ end
1649
+
1650
+ context 'any other values' do
1651
+ subject { df.reject_values 1, 5 }
1652
+ it { is_expected.to be_a Daru::DataFrame }
1653
+ its(:'b.type') { is_expected.to eq :category }
1654
+ its(:'a.to_a') { is_expected.to eq [2, 3, nil, Float::NAN, 7] }
1655
+ its(:'b.to_a') { is_expected.to eq [:b, nil, Float::NAN, nil, 8] }
1656
+ its(:'c.to_a') { is_expected.to eq [Float::NAN, 3, 4, 3, 7] }
1657
+ its(:'index.to_a') { is_expected.to eq [12, 13, 14, 15, 18] }
1658
+ end
1659
+
1660
+ context 'when resultant dataframe has one row' do
1661
+ subject { df.reject_values 1, 2, 3, 4, 5, nil, Float::NAN }
1662
+ it { is_expected.to be_a Daru::DataFrame }
1663
+ its(:'b.type') { is_expected.to eq :category }
1664
+ its(:'a.to_a') { is_expected.to eq [7] }
1665
+ its(:'b.to_a') { is_expected.to eq [8] }
1666
+ its(:'c.to_a') { is_expected.to eq [7] }
1667
+ its(:'index.to_a') { is_expected.to eq [18] }
1668
+ end
1669
+
1670
+ context 'when resultant dataframe is empty' do
1671
+ subject { df.reject_values 1, 2, 3, 4, 5, 6, 7, nil, Float::NAN }
1672
+ it { is_expected.to be_a Daru::DataFrame }
1673
+ its(:'b.type') { is_expected.to eq :category }
1674
+ its(:'a.to_a') { is_expected.to eq [] }
1675
+ its(:'b.to_a') { is_expected.to eq [] }
1676
+ its(:'c.to_a') { is_expected.to eq [] }
1677
+ its(:'index.to_a') { is_expected.to eq [] }
1678
+ end
1679
+ end
1680
+
1681
+ context '#replace_values' do
1682
+ subject do
1683
+ Daru::DataFrame.new({
1684
+ a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
1685
+ b: [:a, :b, nil, Float::NAN, nil, 3, 5, 8],
1686
+ c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
850
1687
  })
851
1688
  end
852
-
853
- it "dups rows with non-missing data only" do
854
- df = Daru::DataFrame.new({
855
- a: [2, 3, 4, 5],
856
- b: [2, 3, 4, 5],
857
- c: [2, 3, 4, 5]
858
- }, index: [1,2,4,6])
859
- expect(@missing_data_df.dup_only_valid).to eq(df)
860
- end
861
-
862
- it "dups only the specified vectors" do
863
- df = Daru::DataFrame.new({
864
- a: [2,3,4,5],
865
- c: [2,3,4,5]
866
- }, index: [1,2,4,6])
867
- expect(@missing_data_df.dup_only_valid([:a, :c])).to eq(df)
868
- end
1689
+ before { subject.to_category :b }
1690
+
1691
+ context 'replace nils only' do
1692
+ before { subject.replace_values nil, 10 }
1693
+ it { is_expected.to be_a Daru::DataFrame }
1694
+ its(:'b.type') { is_expected.to eq :category }
1695
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, 10, Float::NAN, 10, 1, 7] }
1696
+ its(:'b.to_a') { is_expected.to eq [:a, :b, 10, Float::NAN, 10, 3, 5, 8] }
1697
+ its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 3, 4, 3, 5, 10, 7] }
1698
+ end
1699
+
1700
+ context 'replace Float::NAN only' do
1701
+ before { subject.replace_values Float::NAN, 10 }
1702
+ it { is_expected.to be_a Daru::DataFrame }
1703
+ its(:'b.type') { is_expected.to eq :category }
1704
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, nil, 10, nil, 1, 7] }
1705
+ its(:'b.to_a') { is_expected.to eq [:a, :b, nil, 10, nil, 3, 5, 8] }
1706
+ its(:'c.to_a') { is_expected.to eq ['a', 10, 3, 4, 3, 5, nil, 7] }
1707
+ end
1708
+
1709
+ context 'replace both nil and Float::NAN' do
1710
+ before { subject.replace_values [nil, Float::NAN], 10 }
1711
+ it { is_expected.to be_a Daru::DataFrame }
1712
+ its(:'b.type') { is_expected.to eq :category }
1713
+ its(:'a.to_a') { is_expected.to eq [1, 2, 3, 10, 10, 10, 1, 7] }
1714
+ its(:'b.to_a') { is_expected.to eq [:a, :b, 10, 10, 10, 3, 5, 8] }
1715
+ its(:'c.to_a') { is_expected.to eq ['a', 10, 3, 4, 3, 5, 10, 7] }
1716
+ end
1717
+
1718
+ context 'replace other values' do
1719
+ before { subject.replace_values [1, 5], 10 }
1720
+ it { is_expected.to be_a Daru::DataFrame }
1721
+ its(:'b.type') { is_expected.to eq :category }
1722
+ its(:'a.to_a') { is_expected.to eq [10, 2, 3, nil, Float::NAN, nil, 10, 7] }
1723
+ its(:'b.to_a') { is_expected.to eq [:a, :b, nil, Float::NAN, nil, 3, 10, 8] }
1724
+ its(:'c.to_a') { is_expected.to eq ['a', Float::NAN, 3, 4, 3, 10, nil, 7] }
1725
+ end
869
1726
  end
870
1727
 
871
1728
  context "#clone" do
@@ -902,6 +1759,27 @@ describe Daru::DataFrame do
902
1759
 
903
1760
  end
904
1761
 
1762
+ context "#clone_only_valid" do
1763
+ let(:df_with_missing) {
1764
+ Daru::DataFrame.new({
1765
+ a: [1 , 2, 3, nil, 4, nil, 5],
1766
+ b: [nil, 2, 3, nil, 4, nil, 5],
1767
+ c: [1, 2, 3, 43 , 4, nil, 5]
1768
+ })
1769
+ }
1770
+
1771
+ let(:df_without_missing) {
1772
+ Daru::DataFrame.new({
1773
+ a: [2,3,4,5],
1774
+ c: [2,3,4,5]
1775
+ })
1776
+ }
1777
+ it 'does the most reasonable thing' do
1778
+ expect(df_with_missing.clone_only_valid).to eq(df_with_missing.reject_values(*Daru::MISSING_VALUES))
1779
+ expect(df_without_missing.clone_only_valid).to eq(df_without_missing.clone)
1780
+ end
1781
+ end
1782
+
905
1783
  context "#clone_structure" do
906
1784
  it "clones only the index and vector structures of the data frame" do
907
1785
  cs = @data_frame.clone_structure
@@ -912,6 +1790,19 @@ describe Daru::DataFrame do
912
1790
  end
913
1791
  end
914
1792
 
1793
+ context "#each_index" do
1794
+ it "iterates over index" do
1795
+ idxs = []
1796
+ ret = @data_frame.each_index do |index|
1797
+ idxs << index
1798
+ end
1799
+
1800
+ expect(idxs).to eq([:one, :two, :three, :four, :five])
1801
+
1802
+ expect(ret).to eq(@data_frame)
1803
+ end
1804
+ end
1805
+
915
1806
  context "#each_vector_with_index" do
916
1807
  it "iterates over vectors with index" do
917
1808
  idxs = []
@@ -964,6 +1855,10 @@ describe Daru::DataFrame do
964
1855
  ret = @data_frame.each
965
1856
  expect(ret.is_a?(Enumerator)).to eq(true)
966
1857
  end
1858
+
1859
+ it "raises on unknown axis" do
1860
+ expect { @data_frame.each(:kitten) }.to raise_error(ArgumentError, /axis/)
1861
+ end
967
1862
  end
968
1863
 
969
1864
  context "#recode" do
@@ -1108,8 +2003,23 @@ describe Daru::DataFrame do
1108
2003
  end
1109
2004
  end
1110
2005
 
2006
+ # FIXME: collect_VECTORS_with_index, but map_VECTOR_with_index -- ??? -- zverok
2007
+ # (Not saying about unfortunate difference between them...)
2008
+ context "#collect_vector_with_index" do
2009
+ it "iterates over vectors with index and returns an Array" do
2010
+ idx = []
2011
+ ret = @data_frame.collect_vector_with_index do |vector, index|
2012
+ idx << index
2013
+ vector.sum
2014
+ end
2015
+
2016
+ expect(ret).to eq(Daru::Vector.new([15, 65, 165], index: [:a, :b, :c]))
2017
+ expect(idx).to eq([:a, :b, :c])
2018
+ end
2019
+ end
2020
+
1111
2021
  context "#map_rows_with_index" do
1112
- it "iterates over rows with index and returns a modified DataFrame" do
2022
+ it "iterates over rows with index and returns an Array" do
1113
2023
  idx = []
1114
2024
  ret = @data_frame.map_rows_with_index do |row, index|
1115
2025
  idx << index
@@ -1122,6 +2032,21 @@ describe Daru::DataFrame do
1122
2032
  end
1123
2033
  end
1124
2034
 
2035
+ context '#collect_row_with_index' do
2036
+ it "iterates over rows with index and returns a Vector" do
2037
+ idx = []
2038
+ ret = @data_frame.collect_row_with_index do |row, index|
2039
+ idx << index
2040
+ expect(row.class).to eq(Daru::Vector)
2041
+ row[:a] * row[:c]
2042
+ end
2043
+
2044
+ expected = Daru::Vector.new([11, 44, 99, 176, 275], index: @data_frame.index)
2045
+ expect(ret).to eq(expected)
2046
+ expect(idx).to eq([:one, :two, :three, :four, :five])
2047
+ end
2048
+ end
2049
+
1125
2050
  context "#delete_vector" do
1126
2051
  context Daru::Index do
1127
2052
  it "deletes the specified vector" do
@@ -1208,6 +2133,13 @@ describe Daru::DataFrame do
1208
2133
 
1209
2134
  expect(a).to eq(Daru::DataFrame.new({a: [2], b: [3]}, order: [:a, :b], index: [1]))
1210
2135
  end
2136
+
2137
+ it "preserves names of vectors" do
2138
+ df = Daru::DataFrame.new a: 1..3, b: 4..6
2139
+ df1 = df.filter_rows { |r| r[:a] != 2 }
2140
+
2141
+ expect(df1[:a].name).to eq(df[:a].name)
2142
+ end
1211
2143
  end
1212
2144
  end
1213
2145
 
@@ -1225,6 +2157,19 @@ describe Daru::DataFrame do
1225
2157
  end
1226
2158
  end
1227
2159
 
2160
+ context "#filter" do
2161
+ let(:df) { Daru::DataFrame.new({a: [1,2,3], b: [2,3,4]}) }
2162
+ it "dispatches" do
2163
+ expect(df.filter(:row){|r| r[:a] % 2 == 0 }).to \
2164
+ eq df.filter_rows{|r| r[:a] % 2 == 0 }
2165
+
2166
+ expect(df.filter(:vector){|v| v[0] == 1}).to \
2167
+ eq df.filter_vectors{|v| v[0] == 1}
2168
+
2169
+ expect { df.filter(:kitten){} }.to raise_error ArgumentError, /axis/
2170
+ end
2171
+ end
2172
+
1228
2173
  context "#to_a" do
1229
2174
  context Daru::Index do
1230
2175
  it "converts DataFrame into array of hashes" do
@@ -1280,7 +2225,6 @@ describe Daru::DataFrame do
1280
2225
  context Daru::Index do
1281
2226
  before :each do
1282
2227
  @df = Daru::DataFrame.new({a: [5,1,-6,7,5,5], b: [-2,-1,5,3,9,1], c: ['a','aa','aaa','aaaa','aaaaa','aaaaaa']})
1283
- @df[:a].metadata = { cdc_type: 2 }
1284
2228
  end
1285
2229
 
1286
2230
  it "sorts according to given vector order (bang)" do
@@ -1302,17 +2246,115 @@ describe Daru::DataFrame do
1302
2246
  )
1303
2247
  expect(ans).to_not eq(@df)
1304
2248
  end
1305
-
1306
- it "retains the vector metadata from the original dataframe" do
1307
- ans = @df.sort([:a])
1308
- expect(ans[:a].metadata).to eq({ cdc_type: 2 })
1309
- end
1310
-
1311
2249
  end
1312
2250
 
1313
2251
  context Daru::MultiIndex do
1314
2252
  pending
1315
2253
  end
2254
+
2255
+ context Daru::CategoricalIndex do
2256
+ let(:idx) { Daru::CategoricalIndex.new [:a, 1, :a, 1, :c] }
2257
+ let(:df) do
2258
+ Daru::DataFrame.new({
2259
+ a: [2, -1, 3, 4, 5],
2260
+ b: ['x', 'y', 'x', 'a', 'y'],
2261
+ c: [nil, nil, -2, 2, 1]
2262
+ }, index: idx)
2263
+ end
2264
+
2265
+ context "ascending order" do
2266
+ context "single vector" do
2267
+ subject { df.sort [:a] }
2268
+
2269
+ its(:'index.to_a') { is_expected.to eq [1, :a, :a, 1, :c] }
2270
+ its(:'a.to_a') { is_expected.to eq [-1, 2, 3, 4, 5] }
2271
+ its(:'b.to_a') { is_expected.to eq ['y', 'x', 'x', 'a', 'y'] }
2272
+ its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 2, 1] }
2273
+ end
2274
+
2275
+ context "multiple vectors" do
2276
+ subject { df.sort [:c, :b] }
2277
+
2278
+ its(:'index.to_a') { is_expected.to eq [:a, 1, :a, :c, 1] }
2279
+ its(:'a.to_a') { is_expected.to eq [2, -1, 3, 5, 4] }
2280
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'x', 'y', 'a'] }
2281
+ its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 1, 2] }
2282
+ end
2283
+
2284
+ context "block" do
2285
+ context "automatic handle nils" do
2286
+ subject do
2287
+ df.sort [:c], by: {c: lambda { |a| a.abs } }, handle_nils: true
2288
+ end
2289
+
2290
+ its(:'index.to_a') { is_expected.to eq [:a, 1, :c, :a, 1] }
2291
+ its(:'a.to_a') { is_expected.to eq [2, -1, 5, 3, 4] }
2292
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'y', 'x', 'a'] }
2293
+ its(:'c.to_a') { is_expected.to eq [nil, nil, 1, -2, 2] }
2294
+ end
2295
+
2296
+ context "manually handle nils" do
2297
+ subject do
2298
+ df.sort [:c], by: {c: lambda { |a| (a.nil?)?[1]:[0,a.abs] } }
2299
+ end
2300
+
2301
+ its(:'index.to_a') { is_expected.to eq [:c, :a, 1, :a, 1] }
2302
+ its(:'a.to_a') { is_expected.to eq [5, 3, 4, 2, -1] }
2303
+ its(:'b.to_a') { is_expected.to eq ['y', 'x', 'a', 'x', 'y'] }
2304
+ its(:'c.to_a') { is_expected.to eq [1, -2, 2, nil, nil] }
2305
+ end
2306
+ end
2307
+ end
2308
+
2309
+ context "descending order" do
2310
+ context "single vector" do
2311
+ subject { df.sort [:a], ascending: false }
2312
+
2313
+ its(:'index.to_a') { is_expected.to eq [:c, 1, :a, :a, 1] }
2314
+ its(:'a.to_a') { is_expected.to eq [5, 4, 3, 2, -1] }
2315
+ its(:'b.to_a') { is_expected.to eq ['y', 'a', 'x', 'x', 'y'] }
2316
+ its(:'c.to_a') { is_expected.to eq [1, 2, -2, nil, nil] }
2317
+ end
2318
+
2319
+ context "multiple vectors" do
2320
+ subject { df.sort [:c, :b], ascending: false }
2321
+
2322
+ its(:'index.to_a') { is_expected.to eq [1, :a, 1, :c, :a] }
2323
+ its(:'a.to_a') { is_expected.to eq [-1, 2, 4, 5, 3] }
2324
+ its(:'b.to_a') { is_expected.to eq ['y', 'x', 'a', 'y', 'x'] }
2325
+ its(:'c.to_a') { is_expected.to eq [nil, nil, 2, 1, -2] }
2326
+ end
2327
+
2328
+ context "block" do
2329
+ context "automatic handle nils" do
2330
+ subject do
2331
+ df.sort [:c],
2332
+ by: {c: lambda { |a| a.abs } },
2333
+ handle_nils: true,
2334
+ ascending: false
2335
+ end
2336
+
2337
+ its(:'index.to_a') { is_expected.to eq [:a, 1, :a, 1, :c] }
2338
+ its(:'a.to_a') { is_expected.to eq [2, -1, 3, 4, 5] }
2339
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'x', 'a', 'y'] }
2340
+ its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 2, 1] }
2341
+ end
2342
+
2343
+ context "manually handle nils" do
2344
+ subject do
2345
+ df.sort [:c],
2346
+ by: {c: lambda { |a| (a.nil?)?[1]:[0,a.abs] } },
2347
+ ascending: false
2348
+ end
2349
+
2350
+ its(:'index.to_a') { is_expected.to eq [:a, 1, :a, 1, :c] }
2351
+ its(:'a.to_a') { is_expected.to eq [2, -1, 3, 4, 5] }
2352
+ its(:'b.to_a') { is_expected.to eq ['x', 'y', 'x', 'a', 'y'] }
2353
+ its(:'c.to_a') { is_expected.to eq [nil, nil, -2, 2, 1] }
2354
+ end
2355
+ end
2356
+ end
2357
+ end
1316
2358
  end
1317
2359
 
1318
2360
  context "#sort!" do
@@ -1451,6 +2493,11 @@ describe Daru::DataFrame do
1451
2493
  @df.index = Daru::Index.new([1,2])
1452
2494
  }.to raise_error(ArgumentError)
1453
2495
  end
2496
+
2497
+ it "is able to accept array" do
2498
+ @df.index = (1..5).to_a
2499
+ expect(@df.index).to eq Daru::Index.new (1..5).to_a
2500
+ end
1454
2501
  end
1455
2502
 
1456
2503
  context "#vectors=" do
@@ -1755,6 +2802,25 @@ describe Daru::DataFrame do
1755
2802
  ], order: agg_vectors, index: agg_index
1756
2803
  )
1757
2804
  )
2805
+
2806
+ agg_vectors = Daru::MultiIndex.from_tuples(
2807
+ [
2808
+ [:d, 'one'],
2809
+ [:d, 'two'],
2810
+ [:e, 'one'],
2811
+ [:e, 'two']
2812
+ ]
2813
+ )
2814
+ expect(@df.pivot_table(index: [:a], vectors: [:b], values: [:d, :e])).to eq(
2815
+ Daru::DataFrame.new(
2816
+ [
2817
+ [4.5, 5.0/3],
2818
+ [6.5, 3.0],
2819
+ [9.0, 10.0/3],
2820
+ [13.0, 6.0]
2821
+ ], order: agg_vectors, index: agg_index
2822
+ )
2823
+ )
1758
2824
  end
1759
2825
 
1760
2826
  it "overrides default aggregate function to aggregate over sum" do
@@ -1875,6 +2941,12 @@ describe Daru::DataFrame do
1875
2941
  end
1876
2942
  end
1877
2943
 
2944
+ context '#to_df' do
2945
+ it 'returns the dataframe' do
2946
+ @data_frame.to_df == @data_frame
2947
+ end
2948
+ end
2949
+
1878
2950
  context "#to_gsl" do
1879
2951
  it "converts to GSL::Matrix" do
1880
2952
  rows = [[1,2,3,4,5],[11,12,13,14,15],[11,22,33,44,55]].transpose
@@ -1904,6 +2976,16 @@ describe Daru::DataFrame do
1904
2976
 
1905
2977
  expect(ds1.merge(ds3)).to eq(exp)
1906
2978
  end
2979
+
2980
+ context "preserves type of vector names" do
2981
+ let(:df1) { Daru::DataFrame.new({'a'=> [1, 2, 3]}) }
2982
+ let(:df2) { Daru::DataFrame.new({:b=> [4, 5, 6]}) }
2983
+ subject { df1.merge df2 }
2984
+
2985
+ it { is_expected.to be_a Daru::DataFrame }
2986
+ it { expect(subject['a'].to_a).to eq [1, 2, 3] }
2987
+ it { expect(subject[:b].to_a).to eq [4, 5, 6] }
2988
+ end
1907
2989
  end
1908
2990
 
1909
2991
  context "#vector_by_calculation" do
@@ -1967,24 +3049,29 @@ describe Daru::DataFrame do
1967
3049
  end
1968
3050
  end
1969
3051
 
1970
-
1971
- context "has_missing_data?" do
1972
- before do
1973
- a1 = Daru::Vector.new [1, nil, 3, 4, 5, nil]
1974
- a2 = Daru::Vector.new [10, nil, 20, 20, 20, 30]
1975
- b1 = Daru::Vector.new [nil, nil, 1, 1, 1, 2]
1976
- b2 = Daru::Vector.new [2, 2, 2, nil, 2, 3]
1977
- c = Daru::Vector.new [nil, 2, 4, 2, 2, 2]
1978
- @df = Daru::DataFrame.new({ :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2, :c => c })
3052
+ context '#include_values?' do
3053
+ let(:df) do
3054
+ Daru::DataFrame.new({
3055
+ a: [1, 2, 3, 4, Float::NAN, 6, 1],
3056
+ b: [:a, :b, nil, Float::NAN, nil, 3, 5],
3057
+ c: ['a', 6, 3, 4, 3, 5, 3],
3058
+ d: [1, 2, 3, 5, 1, 2, 5]
3059
+ })
1979
3060
  end
1980
-
1981
- it "returns true when missing data present" do
1982
- expect(@df.has_missing_data?).to eq(true)
3061
+ before { df.to_category :b }
3062
+
3063
+ context 'true' do
3064
+ it { expect(df.include_values? nil).to eq true }
3065
+ it { expect(df.include_values? Float::NAN).to eq true }
3066
+ it { expect(df.include_values? nil, Float::NAN).to eq true }
3067
+ it { expect(df.include_values? 1, 30).to eq true }
1983
3068
  end
1984
-
1985
- it "returns false when no missing data prensent" do
1986
- a = @df.dup_only_valid
1987
- expect(a.has_missing_data?).to eq(false)
3069
+
3070
+ context 'false' do
3071
+ it { expect(df[:a, :c].include_values? nil).to eq false }
3072
+ it { expect(df[:c, :d].include_values? Float::NAN).to eq false }
3073
+ it { expect(df[:c, :d].include_values? nil, Float::NAN).to eq false }
3074
+ it { expect(df.include_values? 10, 20).to eq false }
1988
3075
  end
1989
3076
  end
1990
3077
 
@@ -2076,9 +3163,13 @@ describe Daru::DataFrame do
2076
3163
 
2077
3164
  dataf = @df.verify(t3, t1, t2)
2078
3165
  expect(dataf).to eq(exp1)
3166
+ end
2079
3167
 
2080
- dataf = @df.verify(:id, t1, t2, t3)
2081
- expect(dataf).to eq(exp2)
3168
+ it "uses additional fields to extend error messages" do
3169
+ t = create_test("v4='b'", :v2, :v3) { |r| r[:v4] == 'b' }
3170
+
3171
+ dataf = @df.verify(:id, t)
3172
+ expect(dataf).to eq(["1 [r1]: v4='b' (v2=4, v3=10)", "3 [r3]: v4='b' (v2=2, v3=30)"])
2082
3173
  end
2083
3174
  end
2084
3175
 
@@ -2169,6 +3260,10 @@ describe Daru::DataFrame do
2169
3260
  it "returns false if none of the rows satisfy the condition" do
2170
3261
  expect(@df.any?(:row) { |r| r.mean > 100 }).to eq(false)
2171
3262
  end
3263
+
3264
+ it 'fails on unknown axis' do
3265
+ expect { @df.any?(:kitten) { |r| r.mean > 100 } }.to raise_error ArgumentError, /axis/
3266
+ end
2172
3267
  end
2173
3268
 
2174
3269
  context "#all?" do
@@ -2194,6 +3289,10 @@ describe Daru::DataFrame do
2194
3289
  it "returns false if any one of the rows does not satisfy condition" do
2195
3290
  expect(@df.all?(:row) { |r| r.mean == 30 }).to eq(false)
2196
3291
  end
3292
+
3293
+ it 'fails on unknown axis' do
3294
+ expect { @df.all?(:kitten) { |r| r.mean > 100 } }.to raise_error ArgumentError, /axis/
3295
+ end
2197
3296
  end
2198
3297
 
2199
3298
  context "#only_numerics" do
@@ -2372,4 +3471,199 @@ describe Daru::DataFrame do
2372
3471
  end
2373
3472
 
2374
3473
  end
3474
+
3475
+ context '#inspect' do
3476
+ subject { df.inspect }
3477
+
3478
+ context 'empty' do
3479
+ let(:df) { Daru::DataFrame.new({}, order: %w[a b c])}
3480
+ it { is_expected.to eq %Q{
3481
+ |#<Daru::DataFrame(0x3)>
3482
+ | a b c
3483
+ }.unindent}
3484
+ end
3485
+
3486
+ context 'simple' do
3487
+ let(:df) { Daru::DataFrame.new({a: [1,2,3], b: [3,4,5], c: [6,7,8]}, name: 'test')}
3488
+ it { should == %Q{
3489
+ |#<Daru::DataFrame: test (3x3)>
3490
+ | a b c
3491
+ | 0 1 3 6
3492
+ | 1 2 4 7
3493
+ | 2 3 5 8
3494
+ }.unindent}
3495
+ end
3496
+
3497
+ context 'no name' do
3498
+ let(:df) { Daru::DataFrame.new({a: [1,2,3], b: [3,4,5], c: [6,7,8]})}
3499
+ it { should == %Q{
3500
+ |#<Daru::DataFrame(3x3)>
3501
+ | a b c
3502
+ | 0 1 3 6
3503
+ | 1 2 4 7
3504
+ | 2 3 5 8
3505
+ }.unindent}
3506
+ end
3507
+
3508
+ context 'with nils' do
3509
+ let(:df) { Daru::DataFrame.new({a: [1,nil,3], b: [3,4,5], c: [6,7,nil]}, name: 'test')}
3510
+ it { is_expected.to eq %Q{
3511
+ |#<Daru::DataFrame: test (3x3)>
3512
+ | a b c
3513
+ | 0 1 3 6
3514
+ | 1 nil 4 7
3515
+ | 2 3 5 nil
3516
+ }.unindent}
3517
+ end
3518
+
3519
+ context 'very long' do
3520
+ let(:df) { Daru::DataFrame.new({a: [1,1,1]*20, b: [1,1,1]*20, c: [1,1,1]*20}, name: 'test')}
3521
+ it { is_expected.to eq %Q{
3522
+ |#<Daru::DataFrame: test (60x3)>
3523
+ | a b c
3524
+ | 0 1 1 1
3525
+ | 1 1 1 1
3526
+ | 2 1 1 1
3527
+ | 3 1 1 1
3528
+ | 4 1 1 1
3529
+ | 5 1 1 1
3530
+ | 6 1 1 1
3531
+ | 7 1 1 1
3532
+ | 8 1 1 1
3533
+ | 9 1 1 1
3534
+ | 10 1 1 1
3535
+ | 11 1 1 1
3536
+ | 12 1 1 1
3537
+ | 13 1 1 1
3538
+ | 14 1 1 1
3539
+ | ... ... ... ...
3540
+ }.unindent}
3541
+ end
3542
+
3543
+ context 'long data lines' do
3544
+ let(:df) { Daru::DataFrame.new({a: [1,2,3], b: [4,5,6], c: ['this is ridiculously long',nil,nil]}, name: 'test')}
3545
+ it { is_expected.to eq %Q{
3546
+ |#<Daru::DataFrame: test (3x3)>
3547
+ | a b c
3548
+ | 0 1 4 this is ri
3549
+ | 1 2 5 nil
3550
+ | 2 3 6 nil
3551
+ }.unindent}
3552
+ end
3553
+
3554
+ context 'index is a MultiIndex' do
3555
+ let(:df) {
3556
+ Daru::DataFrame.new(
3557
+ {
3558
+ a: [1,2,3,4,5,6,7],
3559
+ b: %w[a b c d e f g]
3560
+ }, index: Daru::MultiIndex.from_tuples([
3561
+ %w[foo one],
3562
+ %w[foo two],
3563
+ %w[foo three],
3564
+ %w[bar one],
3565
+ %w[bar two],
3566
+ %w[bar three],
3567
+ %w[baz one],
3568
+ ]),
3569
+ name: 'test'
3570
+ )
3571
+ }
3572
+
3573
+ it { is_expected.to eq %Q{
3574
+ |#<Daru::DataFrame: test (7x2)>
3575
+ | a b
3576
+ | foo one 1 a
3577
+ | two 2 b
3578
+ | three 3 c
3579
+ | bar one 4 d
3580
+ | two 5 e
3581
+ | three 6 f
3582
+ | baz one 7 g
3583
+ }.unindent}
3584
+ end
3585
+
3586
+ context 'vectors is a MultiIndex' do
3587
+ end
3588
+
3589
+ context 'spacing and threshold settings' do
3590
+ end
3591
+ end
3592
+
3593
+ context '#to_s' do
3594
+ it 'produces something, despite of how reasonable you think it is' do
3595
+ expect(@data_frame.to_s).to eq @data_frame.to_html
3596
+ end
3597
+ end
3598
+
3599
+ context '#to_json' do
3600
+ let(:df) { Daru::DataFrame.new({a: [1,2,3], b: [3,4,5], c: [6,7,8]}, index: [:one, :two, :three], name: 'test')}
3601
+ subject { JSON.parse(json) }
3602
+
3603
+ context 'with index' do
3604
+ let(:json) { df.to_json(false) }
3605
+ # FIXME: is it most reasonable we can do?.. -- zverok
3606
+ # For me, more resonable thing would be something like
3607
+ #
3608
+ # [
3609
+ # {"index" => "one" , "a"=>1, "b"=>3, "c"=>6},
3610
+ # {"index" => "two" , "a"=>2, "b"=>4, "c"=>7},
3611
+ # {"index" => "three", "a"=>3, "b"=>5, "c"=>8}
3612
+ # ]
3613
+ #
3614
+ # Or maybe
3615
+ #
3616
+ # [
3617
+ # ["one" , {"a"=>1, "b"=>3, "c"=>6}],
3618
+ # ["two" , {"a"=>2, "b"=>4, "c"=>7}],
3619
+ # ["three", {"a"=>3, "b"=>5, "c"=>8}]
3620
+ # ]
3621
+ #
3622
+ # Or even
3623
+ #
3624
+ # {
3625
+ # "one" => {"a"=>1, "b"=>3, "c"=>6},
3626
+ # "two" => {"a"=>2, "b"=>4, "c"=>7},
3627
+ # "three" => {"a"=>3, "b"=>5, "c"=>8}
3628
+ # }
3629
+ #
3630
+ it { is_expected.to eq(
3631
+ [
3632
+ [
3633
+ {"a"=>1, "b"=>3, "c"=>6},
3634
+ {"a"=>2, "b"=>4, "c"=>7},
3635
+ {"a"=>3, "b"=>5, "c"=>8}
3636
+ ],
3637
+ ["one", "two", "three"]
3638
+ ]
3639
+ )}
3640
+ end
3641
+
3642
+ context 'without index' do
3643
+ let(:json) { df.to_json(true) }
3644
+ it { is_expected.to eq(
3645
+ [
3646
+ {"a"=>1, "b"=>3, "c"=>6},
3647
+ {"a"=>2, "b"=>4, "c"=>7},
3648
+ {"a"=>3, "b"=>5, "c"=>8}
3649
+ ]
3650
+ )}
3651
+ end
3652
+ end
3653
+
3654
+ context '#create_sql' do
3655
+ let(:df) { Daru::DataFrame.new({
3656
+ a: [1,2,3],
3657
+ b: ['test', 'me', 'please'],
3658
+ c: ['2015-06-01', '2015-06-02', '2015-06-03']
3659
+ },
3660
+ name: 'test'
3661
+ )}
3662
+ subject { df.create_sql('foo') }
3663
+ it { is_expected.to eq %Q{
3664
+ |CREATE TABLE foo (a INTEGER,
3665
+ | b VARCHAR (255),
3666
+ | c DATE) CHARACTER SET=UTF8;
3667
+ }.unindent}
3668
+ end
2375
3669
  end if mri?