daru 0.0.5 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +14 -0
  3. data/.travis.yml +26 -4
  4. data/CONTRIBUTING.md +31 -0
  5. data/Gemfile +1 -2
  6. data/{History.txt → History.md} +110 -44
  7. data/README.md +21 -288
  8. data/Rakefile +1 -0
  9. data/daru.gemspec +12 -8
  10. data/lib/daru.rb +36 -1
  11. data/lib/daru/accessors/array_wrapper.rb +8 -3
  12. data/lib/daru/accessors/gsl_wrapper.rb +113 -0
  13. data/lib/daru/accessors/nmatrix_wrapper.rb +6 -17
  14. data/lib/daru/core/group_by.rb +0 -1
  15. data/lib/daru/dataframe.rb +1192 -83
  16. data/lib/daru/extensions/rserve.rb +21 -0
  17. data/lib/daru/index.rb +14 -0
  18. data/lib/daru/io/io.rb +170 -8
  19. data/lib/daru/maths/arithmetic/dataframe.rb +4 -3
  20. data/lib/daru/maths/arithmetic/vector.rb +4 -4
  21. data/lib/daru/maths/statistics/dataframe.rb +48 -27
  22. data/lib/daru/maths/statistics/vector.rb +215 -33
  23. data/lib/daru/monkeys.rb +53 -7
  24. data/lib/daru/multi_index.rb +21 -4
  25. data/lib/daru/plotting/dataframe.rb +83 -25
  26. data/lib/daru/plotting/vector.rb +9 -10
  27. data/lib/daru/vector.rb +596 -61
  28. data/lib/daru/version.rb +3 -0
  29. data/spec/accessors/wrappers_spec.rb +51 -0
  30. data/spec/core/group_by_spec.rb +0 -2
  31. data/spec/daru_spec.rb +58 -0
  32. data/spec/dataframe_spec.rb +768 -73
  33. data/spec/extensions/rserve_spec.rb +52 -0
  34. data/spec/fixtures/bank2.dat +200 -0
  35. data/spec/fixtures/repeated_fields.csv +7 -0
  36. data/spec/fixtures/scientific_notation.csv +4 -0
  37. data/spec/fixtures/test_xls.xls +0 -0
  38. data/spec/io/io_spec.rb +161 -24
  39. data/spec/math/arithmetic/dataframe_spec.rb +26 -7
  40. data/spec/math/arithmetic/vector_spec.rb +8 -0
  41. data/spec/math/statistics/dataframe_spec.rb +16 -1
  42. data/spec/math/statistics/vector_spec.rb +215 -47
  43. data/spec/spec_helper.rb +21 -2
  44. data/spec/vector_spec.rb +368 -12
  45. metadata +99 -16
  46. data/lib/version.rb +0 -3
  47. data/notebooks/grouping_splitting_pivots.ipynb +0 -529
  48. data/notebooks/intro_with_music_data_.ipynb +0 -303
@@ -1,10 +1,14 @@
1
1
  require 'spec_helper.rb'
2
2
 
3
3
  describe Daru::Vector do
4
- ALL_DTYPES = [:array, :nmatrix]
5
-
6
4
  ALL_DTYPES.each do |dtype|
7
- describe dtype do
5
+ describe dtype.to_s do
6
+ before do
7
+ @common_all_dtypes = Daru::Vector.new(
8
+ [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, 11, -99, -99],
9
+ dtype: dtype, name: :common_all_dtypes)
10
+ end
11
+
8
12
  context "#initialize" do
9
13
  before do
10
14
  @tuples = [
@@ -64,9 +68,9 @@ describe Daru::Vector do
64
68
  end
65
69
 
66
70
  it "inserts nils for extra indices" do
67
- dv = Daru::Vector.new [1,2,3], name: :yoga, index: [0,1,2,3,4], dtype: :array
71
+ dv = Daru::Vector.new [1,2,3], name: :yoga, index: [0,1,2,3,4], dtype: dtype
68
72
 
69
- expect(dv).to eq([1,2,3,nil,nil].dv(:yoga,nil, Array))
73
+ expect(dv).to eq([1,2,3,nil,nil].dv(:yoga,nil, :array))
70
74
  end
71
75
 
72
76
  it "inserts nils for extra indices (MultiIndex)" do
@@ -75,6 +79,40 @@ describe Daru::Vector do
75
79
  end
76
80
  end
77
81
 
82
+ context ".new_with_size" do
83
+ it "creates new vector from only size" do
84
+ v1 = Daru::Vector.new 10.times.map { nil }, dtype: dtype
85
+ v2 = Daru::Vector.new_with_size 10, dtype: dtype
86
+ expect(v2).to eq(v1)
87
+ end if [:array, :nmatrix].include?(dtype)
88
+
89
+ it "creates new vector from only size and value" do
90
+ a = rand
91
+ v1 = Daru::Vector.new 10.times.map { a }, dtype: dtype
92
+ v2 = Daru::Vector.new_with_size(10, value: a, dtype: dtype)
93
+ expect(v2).to eq(v1)
94
+ end
95
+
96
+ it "accepts block" do
97
+ v1 = Daru::Vector.new 10.times.map {|i| i * 2 }
98
+ v2 = Daru::Vector.new_with_size(10, dtype: dtype) { |i| i * 2 }
99
+ expect(v2).to eq(v1)
100
+ end
101
+ end
102
+
103
+ context ".[]" do
104
+ it "returns same results as R-c()" do
105
+ reference = Daru::Vector.new([0, 4, 5, 6, 10])
106
+ expect(Daru::Vector[0, 4, 5, 6, 10]) .to eq(reference)
107
+ expect(Daru::Vector[0, 4..6, 10]) .to eq(reference)
108
+ expect(Daru::Vector[[0], [4, 5, 6], [10]]) .to eq(reference)
109
+ expect(Daru::Vector[[0], [4, [5, [6]]], [10]]).to eq(reference)
110
+
111
+ expect(Daru::Vector[[0], Daru::Vector.new([4, 5, 6]), [10]])
112
+ .to eq(reference)
113
+ end
114
+ end
115
+
78
116
  context "#[]" do
79
117
  context Daru::Index do
80
118
  before :each do
@@ -107,7 +145,7 @@ describe Daru::Vector do
107
145
 
108
146
  it "returns a vector when specified numeric Range" do
109
147
  expect(@dv[3..4]).to eq(Daru::Vector.new([4,5], name: :yoga,
110
- index: [:padme, :r2d2], name: :yoga, dtype: dtype))
148
+ index: [:padme, :r2d2], dtype: dtype))
111
149
  end
112
150
  end
113
151
 
@@ -302,14 +340,14 @@ describe Daru::Vector do
302
340
  @dv.delete_at :one
303
341
 
304
342
  expect(@dv).to eq(Daru::Vector.new [2,3,4,5], name: :a,
305
- index: [:two, :three, :four, :five]), dtype: dtype
343
+ index: [:two, :three, :four, :five], dtype: dtype)
306
344
  end
307
345
 
308
346
  it "deletes element of specified integer index" do
309
347
  @dv.delete_at 2
310
348
 
311
349
  expect(@dv).to eq(Daru::Vector.new [1,2,4,5], name: :a,
312
- index: [:one, :two, :four, :five]), dtype: dtype
350
+ index: [:one, :two, :four, :five], dtype: dtype)
313
351
  end
314
352
  end
315
353
 
@@ -318,6 +356,17 @@ describe Daru::Vector do
318
356
  end
319
357
  end
320
358
 
359
+ context "#delete_if" do
360
+ it "deletes elements if block evaluates to true" do
361
+ v = Daru::Vector.new [1,22,33,45,65,32,524,656,123,99,77], dtype: dtype
362
+ ret = v.delete_if { |d| d % 11 != 0 }
363
+ expect(ret).to eq(
364
+ Daru::Vector.new([1,45,65,32,524,656,123],
365
+ index: [0,3,4,5,6,7,8], dtype: dtype))
366
+ expect(ret.dtype).to eq(dtype)
367
+ end
368
+ end
369
+
321
370
  context "#index_of" do
322
371
  context Daru::Index do
323
372
  it "returns index of specified value" do
@@ -494,10 +543,160 @@ describe Daru::Vector do
494
543
  pending
495
544
  end
496
545
  end
546
+
547
+ context "#collect" do
548
+ it "returns an Array" do
549
+ a = @common_all_dtypes.collect { |v| v }
550
+ expect(a).to eq([5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, 11, -99, -99])
551
+ end
552
+ end
553
+
554
+ context "#map" do
555
+ it "maps" do
556
+ a = @common_all_dtypes.map { |v| v }
557
+ expect(a).to eq([5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, 11, -99, -99])
558
+ end
559
+ end
560
+
561
+ context "#map!" do
562
+ it "destructively maps" do
563
+ @common_all_dtypes.map! { |v| v + 1 }
564
+ expect(@common_all_dtypes).to eq(Daru::Vector.new(
565
+ [6, 6, 6, 6, 6, 7, 7, 8, 9, 10, 11, 2, 3, 4, 5, 12, -98, -98],
566
+ dtype: dtype))
567
+ end
568
+ end
569
+
570
+ context "#recode" do
571
+ it "maps and returns a vector of dtype of self by default" do
572
+ a = @common_all_dtypes.recode { |v| v == -99 ? 1 : 0 }
573
+ exp = Daru::Vector.new [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]
574
+ expect(a).to eq(exp)
575
+ expect(a.dtype).to eq(:array)
576
+ end
577
+
578
+ it "maps and returns a vector of dtype gsl" do
579
+ a = @common_all_dtypes.recode(:gsl) { |v| v == -99 ? 1 : 0 }
580
+ exp = Daru::Vector.new [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], dtype: :gsl
581
+ expect(a).to eq(exp)
582
+ expect(a.dtype).to eq(:gsl)
583
+ end
584
+
585
+ it "maps and returns a vector of dtype nmatrix" do
586
+ a = @common_all_dtypes.recode(:nmatrix) { |v| v == -99 ? 1 : 0 }
587
+ exp = Daru::Vector.new [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], dtype: :nmatrix
588
+ expect(a).to eq(exp)
589
+ expect(a.dtype).to eq(:nmatrix)
590
+ end
591
+ end
592
+
593
+ context "#recode!" do
594
+ before :each do
595
+ @vector = Daru::Vector.new(
596
+ [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, 11, -99, -99],
597
+ dtype: dtype, name: :common_all_dtypes)
598
+ end
599
+
600
+ it "destructively maps and returns a vector of dtype of self by default" do
601
+ @vector.recode! { |v| v == -99 ? 1 : 0 }
602
+ exp = Daru::Vector.new [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]
603
+ expect(@vector).to eq(exp)
604
+ expect(@vector.dtype).to eq(dtype)
605
+ end
606
+
607
+ it "destructively maps and returns a vector of dtype gsl" do
608
+ @vector.recode!(:gsl) { |v| v == -99 ? 1 : 0 }
609
+ exp = Daru::Vector.new [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], dtype: :gsl
610
+ expect(@vector).to eq(exp)
611
+ expect(@vector.dtype).to eq(exp.dtype)
612
+ end
613
+
614
+ it "destructively maps and returns a vector of dtype nmatrix" do
615
+ @vector.recode!(:nmatrix) { |v| v == -99 ? 1 : 0 }
616
+ exp = Daru::Vector.new [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], dtype: :nmatrix
617
+ expect(@vector).to eq(exp)
618
+ expect(@vector.dtype).to eq(exp.dtype)
619
+ end
620
+ end
621
+
622
+ context "#verify" do
623
+ it "returns a hash of invalid data and index of data" do
624
+ v = Daru::Vector.new [1,2,3,4,5,6,-99,35,-100], dtype: dtype
625
+ h = v.verify { |d| d > 0 }
626
+ e = { 6 => -99, 8 => -100 }
627
+ expect(h).to eq(e)
628
+ end
629
+ end
630
+
631
+ context "#summary" do
632
+ it "has name in the summary" do
633
+ expect(@common_all_dtypes.summary.match("#{@common_all_dtypes.name}")).to_not eq(nil)
634
+ end
635
+ end
636
+
637
+ context "#bootstrap" do
638
+ it "returns a vector with mean=mu and sd=se" do
639
+ rng = Distribution::Normal.rng(0, 1)
640
+ vector =Daru::Vector.new_with_size(100, dtype: dtype) { rng.call}
641
+
642
+ df = vector.bootstrap([:mean, :sd], 200)
643
+ se = 1 / Math.sqrt(vector.size)
644
+ expect(df[:mean].mean).to be_within(0.3).of(0)
645
+ expect(df[:mean].sd).to be_within(0.02).of(se)
646
+ end
647
+ end
497
648
  end
498
- end # checking with ALL_DTYPES
649
+ end # describe ALL_DTYPES.each
499
650
 
651
+ # -----------------------------------------------------------------------
500
652
  # works with arrays only
653
+
654
+ context "#splitted" do
655
+ it "splits correctly" do
656
+ a = Daru::Vector.new ['a', 'a,b', 'c,d', 'a,d', 'd', 10, nil]
657
+ expect(a.splitted).to eq([%w(a), %w(a b), %w(c d), %w(a d), %w(d), [10], nil])
658
+ end
659
+ end
660
+
661
+ context "#missing_values" do
662
+ before do
663
+ @common = Daru::Vector.new([5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99])
664
+ end
665
+
666
+ it "allows setting the value to be treated as missing" do
667
+ @common.missing_values = [10]
668
+ expect(@common.only_valid.to_a.sort).to eq(
669
+ [-99, -99, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 6, 7, 8, 9]
670
+ )
671
+ expect(@common.to_a).to eq(
672
+ [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99]
673
+ )
674
+
675
+ @common.missing_values = [-99]
676
+ expect(@common.only_valid.to_a.sort).to eq(
677
+ [1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10]
678
+ )
679
+ expect(@common.to_a).to eq(
680
+ [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99]
681
+ )
682
+
683
+ @common.missing_values = []
684
+ expect(@common.only_valid.to_a.sort).to eq(
685
+ [-99, -99, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10]
686
+ )
687
+ expect(@common.to_a).to eq(
688
+ [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99]
689
+ )
690
+ end
691
+
692
+ it "responds to has_missing_data? with explicit missing_values" do
693
+ a = Daru::Vector.new [1,2,3,4,10]
694
+ a.missing_values = [10]
695
+
696
+ expect(a.has_missing_data?).to eq(true)
697
+ end
698
+ end
699
+
501
700
  context "#is_nil?" do
502
701
  before(:each) do
503
702
  @with_md = Daru::Vector.new([1,2,nil,3,4,nil])
@@ -523,19 +722,19 @@ describe Daru::Vector do
523
722
  end
524
723
  end
525
724
 
526
- context "#nil_positions" do
725
+ context "#missing_positions" do
527
726
  context Daru::Index do
528
727
  before(:each) do
529
728
  @with_md = Daru::Vector.new([1,2,nil,3,4,nil])
530
729
  end
531
730
 
532
731
  it "returns the indexes of nils" do
533
- expect(@with_md.nil_positions).to eq([2,5])
732
+ expect(@with_md.missing_positions).to eq([2,5])
534
733
  end
535
734
 
536
735
  it "updates after assingment" do
537
736
  @with_md[3] = nil
538
- expect(@with_md.nil_positions).to eq([2,3,5])
737
+ expect(@with_md.missing_positions).to eq([2,3,5])
539
738
  end
540
739
  end
541
740
 
@@ -617,4 +816,161 @@ describe Daru::Vector do
617
816
  expect(@vector.to_matrix(:vertical)).to eq(Matrix.columns([[1,2,3,4,5,6]]))
618
817
  end
619
818
  end
819
+
820
+ context "#only_valid" do
821
+ it "returns a Vector of only non-nil data" do
822
+ vector = Daru::Vector.new [1,2,3,4,nil,3,nil],
823
+ index: [:a, :b, :c, :d, :e, :f, :g]
824
+ expect(vector.only_valid).to eq(Daru::Vector.new([1,2,3,4,3],
825
+ index: [:a, :b, :c, :d, :f]))
826
+ end
827
+ end
828
+
829
+ context "#only_numerics" do
830
+ it "returns only numerical or missing data" do
831
+ v = Daru::Vector.new([1,2,nil,3,4,'s','a',nil])
832
+ expect(v.only_numerics).to eq(Daru::Vector.new([1,2,nil,3,4,nil],
833
+ index: [0,1,2,3,4,7]))
834
+ end
835
+ end
836
+
837
+ context "#to_gsl" do
838
+ it "returns a GSL::Vector of non-nil data" do
839
+ vector = Daru::Vector.new [1,2,3,4,nil,6,nil]
840
+ expect(vector.to_gsl).to eq(GSL::Vector.alloc(1,2,3,4,6))
841
+
842
+ gsl_vec = Daru::Vector.new [1,2,3,4,5], dtype: :gsl
843
+ expect(gsl_vec.to_gsl).to eq(GSL::Vector.alloc(1,2,3,4,5))
844
+ end
845
+ end
846
+
847
+ context "#split_by_separator" do
848
+ def expect_correct_tokens hash
849
+ expect(hash['a'].to_a).to eq([1, 1, 0, 1, 0, nil])
850
+ expect(hash['b'].to_a).to eq([0, 1, 0, 0, 0, nil])
851
+ expect(hash['c'].to_a).to eq([0, 0, 1, 0, 0, nil])
852
+ expect(hash['d'].to_a).to eq([0, 0, 1, 1, 0, nil])
853
+ expect(hash[10].to_a).to eq([0, 0, 0, 0, 1, nil])
854
+ end
855
+
856
+ before do
857
+ @a = Daru::Vector.new ['a', 'a,b', 'c,d', 'a,d', 10, nil]
858
+ @b = @a.split_by_separator(',')
859
+ end
860
+
861
+ it "returns a Hash" do
862
+ expect(@b.class).to eq(Hash)
863
+ end
864
+
865
+ it "returned Hash has keys with with different values of @a" do
866
+ expect(@b.keys).to eq(['a', 'b', 'c', 'd', 10])
867
+ end
868
+
869
+ it "returns a Hash, whose values are Daru::Vector" do
870
+ @b.each_key do |key|
871
+ expect(@b[key].class).to eq(Daru::Vector)
872
+ end
873
+ end
874
+
875
+ it "ensures that hash values are n times the tokens appears" do
876
+ expect_correct_tokens @b
877
+ end
878
+
879
+ it "gives the same values using a different separator" do
880
+ a = Daru::Vector.new ['a', 'a*b', 'c*d', 'a*d', 10, nil]
881
+ b = a.split_by_separator '*'
882
+ expect_correct_tokens b
883
+ end
884
+ end
885
+
886
+ context "#split_by_separator_freq" do
887
+ it "returns the number of ocurrences of tokens" do
888
+ a = Daru::Vector.new ['a', 'a,b', 'c,d', 'a,d', 10, nil]
889
+ expect(a.split_by_separator_freq).to eq(
890
+ { 'a' => 3, 'b' => 1, 'c' => 1, 'd' => 2, 10 => 1 })
891
+ end
892
+ end
893
+
894
+ context "#n_valid" do
895
+ it "returns number of non-missing positions" do
896
+ v = Daru::Vector.new [1,2,3,4,nil,nil,3,5]
897
+ expect(v.n_valid).to eq(6)
898
+ end
899
+ end
900
+
901
+ context "#reset_index!" do
902
+ it "resets any index to a numerical serialized index" do
903
+ v = Daru::Vector.new([1,2,3,4,5,nil,nil,4,nil])
904
+ r = v.only_valid.reset_index!
905
+ expect(r).to eq(Daru::Vector.new([1,2,3,4,5,4]))
906
+ expect(r.index).to eq(Daru::Index.new([0,1,2,3,4,5]))
907
+
908
+ indexed = Daru::Vector.new([1,2,3,4,5], index: [:a, :b, :c, :d, :e])
909
+ expect(indexed.reset_index!.index).to eq(Daru::Index.new([0,1,2,3,4]))
910
+ end
911
+ end
912
+
913
+ context "#rename" do
914
+ before :each do
915
+ @v = Daru::Vector.new [1,2,3,4,5,5], name: :this_vector
916
+ end
917
+
918
+ it "assings name" do
919
+ @v.rename :that_vector
920
+ expect(@v.name).to eq(:that_vector)
921
+ end
922
+
923
+ it "stores name as a symbol" do
924
+ @v.rename "This is a vector"
925
+ expect(@v.name).to eq(:"This is a vector")
926
+ end
927
+ end
928
+
929
+ context "#any?" do
930
+ before do
931
+ @v = Daru::Vector.new([1,2,3,4,5])
932
+ end
933
+
934
+ it "returns true if block returns true for any one of the elements" do
935
+ expect(@v.any?{ |e| e == 1 }).to eq(true)
936
+ end
937
+
938
+ it "returns false if block is false for all elements" do
939
+ expect(@v.any?{ |e| e > 10 }).to eq(false)
940
+ end
941
+ end
942
+
943
+ context "#all?" do
944
+ before do
945
+ @v = Daru::Vector.new([1,2,3,4,5])
946
+ end
947
+
948
+ it "returns true if block is true for all elements" do
949
+ expect(@v.all? { |e| e < 6 }).to eq(true)
950
+ end
951
+
952
+ it "returns false if block is false for any one element" do
953
+ expect(@v.all? { |e| e == 2 }).to eq(false)
954
+ end
955
+ end
956
+
957
+ context "#only_missing" do
958
+ it "returns a vector (with proper index) of all the elements marked 'missing'" do
959
+ v = Daru::Vector.new([1,2,3,4,5,6,4,5,5,4,4,nil,nil,nil])
960
+ v.missing_values = [nil, 5]
961
+
962
+ expect(v.only_missing).to eq(Daru::Vector.new([5,5,5,nil,nil,nil],
963
+ index: [4,7,8,11,12,13]))
964
+ end
965
+ end
966
+
967
+ context "#detach_index" do
968
+ it "creates a DataFrame with first Vector as index and second as values of the Vector" do
969
+ v = Daru::Vector.new([1,2,3,4,5,6], index: [:a, :b, :c, :d, :e, :f], name: :values)
970
+ expect(v.detach_index).to eq(Daru::DataFrame.new({
971
+ index: ['a', 'b', 'c', 'd', 'e', 'f'],
972
+ vector: [1,2,3,4,5,6]
973
+ }))
974
+ end
975
+ end
620
976
  end if mri?