daru 0.0.5 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +14 -0
  3. data/.travis.yml +26 -4
  4. data/CONTRIBUTING.md +31 -0
  5. data/Gemfile +1 -2
  6. data/{History.txt → History.md} +110 -44
  7. data/README.md +21 -288
  8. data/Rakefile +1 -0
  9. data/daru.gemspec +12 -8
  10. data/lib/daru.rb +36 -1
  11. data/lib/daru/accessors/array_wrapper.rb +8 -3
  12. data/lib/daru/accessors/gsl_wrapper.rb +113 -0
  13. data/lib/daru/accessors/nmatrix_wrapper.rb +6 -17
  14. data/lib/daru/core/group_by.rb +0 -1
  15. data/lib/daru/dataframe.rb +1192 -83
  16. data/lib/daru/extensions/rserve.rb +21 -0
  17. data/lib/daru/index.rb +14 -0
  18. data/lib/daru/io/io.rb +170 -8
  19. data/lib/daru/maths/arithmetic/dataframe.rb +4 -3
  20. data/lib/daru/maths/arithmetic/vector.rb +4 -4
  21. data/lib/daru/maths/statistics/dataframe.rb +48 -27
  22. data/lib/daru/maths/statistics/vector.rb +215 -33
  23. data/lib/daru/monkeys.rb +53 -7
  24. data/lib/daru/multi_index.rb +21 -4
  25. data/lib/daru/plotting/dataframe.rb +83 -25
  26. data/lib/daru/plotting/vector.rb +9 -10
  27. data/lib/daru/vector.rb +596 -61
  28. data/lib/daru/version.rb +3 -0
  29. data/spec/accessors/wrappers_spec.rb +51 -0
  30. data/spec/core/group_by_spec.rb +0 -2
  31. data/spec/daru_spec.rb +58 -0
  32. data/spec/dataframe_spec.rb +768 -73
  33. data/spec/extensions/rserve_spec.rb +52 -0
  34. data/spec/fixtures/bank2.dat +200 -0
  35. data/spec/fixtures/repeated_fields.csv +7 -0
  36. data/spec/fixtures/scientific_notation.csv +4 -0
  37. data/spec/fixtures/test_xls.xls +0 -0
  38. data/spec/io/io_spec.rb +161 -24
  39. data/spec/math/arithmetic/dataframe_spec.rb +26 -7
  40. data/spec/math/arithmetic/vector_spec.rb +8 -0
  41. data/spec/math/statistics/dataframe_spec.rb +16 -1
  42. data/spec/math/statistics/vector_spec.rb +215 -47
  43. data/spec/spec_helper.rb +21 -2
  44. data/spec/vector_spec.rb +368 -12
  45. metadata +99 -16
  46. data/lib/version.rb +0 -3
  47. data/notebooks/grouping_splitting_pivots.ipynb +0 -529
  48. data/notebooks/intro_with_music_data_.ipynb +0 -303
@@ -1,10 +1,14 @@
1
1
  require 'spec_helper.rb'
2
2
 
3
3
  describe Daru::Vector do
4
- ALL_DTYPES = [:array, :nmatrix]
5
-
6
4
  ALL_DTYPES.each do |dtype|
7
- describe dtype do
5
+ describe dtype.to_s do
6
+ before do
7
+ @common_all_dtypes = Daru::Vector.new(
8
+ [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, 11, -99, -99],
9
+ dtype: dtype, name: :common_all_dtypes)
10
+ end
11
+
8
12
  context "#initialize" do
9
13
  before do
10
14
  @tuples = [
@@ -64,9 +68,9 @@ describe Daru::Vector do
64
68
  end
65
69
 
66
70
  it "inserts nils for extra indices" do
67
- dv = Daru::Vector.new [1,2,3], name: :yoga, index: [0,1,2,3,4], dtype: :array
71
+ dv = Daru::Vector.new [1,2,3], name: :yoga, index: [0,1,2,3,4], dtype: dtype
68
72
 
69
- expect(dv).to eq([1,2,3,nil,nil].dv(:yoga,nil, Array))
73
+ expect(dv).to eq([1,2,3,nil,nil].dv(:yoga,nil, :array))
70
74
  end
71
75
 
72
76
  it "inserts nils for extra indices (MultiIndex)" do
@@ -75,6 +79,40 @@ describe Daru::Vector do
75
79
  end
76
80
  end
77
81
 
82
+ context ".new_with_size" do
83
+ it "creates new vector from only size" do
84
+ v1 = Daru::Vector.new 10.times.map { nil }, dtype: dtype
85
+ v2 = Daru::Vector.new_with_size 10, dtype: dtype
86
+ expect(v2).to eq(v1)
87
+ end if [:array, :nmatrix].include?(dtype)
88
+
89
+ it "creates new vector from only size and value" do
90
+ a = rand
91
+ v1 = Daru::Vector.new 10.times.map { a }, dtype: dtype
92
+ v2 = Daru::Vector.new_with_size(10, value: a, dtype: dtype)
93
+ expect(v2).to eq(v1)
94
+ end
95
+
96
+ it "accepts block" do
97
+ v1 = Daru::Vector.new 10.times.map {|i| i * 2 }
98
+ v2 = Daru::Vector.new_with_size(10, dtype: dtype) { |i| i * 2 }
99
+ expect(v2).to eq(v1)
100
+ end
101
+ end
102
+
103
+ context ".[]" do
104
+ it "returns same results as R-c()" do
105
+ reference = Daru::Vector.new([0, 4, 5, 6, 10])
106
+ expect(Daru::Vector[0, 4, 5, 6, 10]) .to eq(reference)
107
+ expect(Daru::Vector[0, 4..6, 10]) .to eq(reference)
108
+ expect(Daru::Vector[[0], [4, 5, 6], [10]]) .to eq(reference)
109
+ expect(Daru::Vector[[0], [4, [5, [6]]], [10]]).to eq(reference)
110
+
111
+ expect(Daru::Vector[[0], Daru::Vector.new([4, 5, 6]), [10]])
112
+ .to eq(reference)
113
+ end
114
+ end
115
+
78
116
  context "#[]" do
79
117
  context Daru::Index do
80
118
  before :each do
@@ -107,7 +145,7 @@ describe Daru::Vector do
107
145
 
108
146
  it "returns a vector when specified numeric Range" do
109
147
  expect(@dv[3..4]).to eq(Daru::Vector.new([4,5], name: :yoga,
110
- index: [:padme, :r2d2], name: :yoga, dtype: dtype))
148
+ index: [:padme, :r2d2], dtype: dtype))
111
149
  end
112
150
  end
113
151
 
@@ -302,14 +340,14 @@ describe Daru::Vector do
302
340
  @dv.delete_at :one
303
341
 
304
342
  expect(@dv).to eq(Daru::Vector.new [2,3,4,5], name: :a,
305
- index: [:two, :three, :four, :five]), dtype: dtype
343
+ index: [:two, :three, :four, :five], dtype: dtype)
306
344
  end
307
345
 
308
346
  it "deletes element of specified integer index" do
309
347
  @dv.delete_at 2
310
348
 
311
349
  expect(@dv).to eq(Daru::Vector.new [1,2,4,5], name: :a,
312
- index: [:one, :two, :four, :five]), dtype: dtype
350
+ index: [:one, :two, :four, :five], dtype: dtype)
313
351
  end
314
352
  end
315
353
 
@@ -318,6 +356,17 @@ describe Daru::Vector do
318
356
  end
319
357
  end
320
358
 
359
+ context "#delete_if" do
360
+ it "deletes elements if block evaluates to true" do
361
+ v = Daru::Vector.new [1,22,33,45,65,32,524,656,123,99,77], dtype: dtype
362
+ ret = v.delete_if { |d| d % 11 != 0 }
363
+ expect(ret).to eq(
364
+ Daru::Vector.new([1,45,65,32,524,656,123],
365
+ index: [0,3,4,5,6,7,8], dtype: dtype))
366
+ expect(ret.dtype).to eq(dtype)
367
+ end
368
+ end
369
+
321
370
  context "#index_of" do
322
371
  context Daru::Index do
323
372
  it "returns index of specified value" do
@@ -494,10 +543,160 @@ describe Daru::Vector do
494
543
  pending
495
544
  end
496
545
  end
546
+
547
+ context "#collect" do
548
+ it "returns an Array" do
549
+ a = @common_all_dtypes.collect { |v| v }
550
+ expect(a).to eq([5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, 11, -99, -99])
551
+ end
552
+ end
553
+
554
+ context "#map" do
555
+ it "maps" do
556
+ a = @common_all_dtypes.map { |v| v }
557
+ expect(a).to eq([5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, 11, -99, -99])
558
+ end
559
+ end
560
+
561
+ context "#map!" do
562
+ it "destructively maps" do
563
+ @common_all_dtypes.map! { |v| v + 1 }
564
+ expect(@common_all_dtypes).to eq(Daru::Vector.new(
565
+ [6, 6, 6, 6, 6, 7, 7, 8, 9, 10, 11, 2, 3, 4, 5, 12, -98, -98],
566
+ dtype: dtype))
567
+ end
568
+ end
569
+
570
+ context "#recode" do
571
+ it "maps and returns a vector of dtype of self by default" do
572
+ a = @common_all_dtypes.recode { |v| v == -99 ? 1 : 0 }
573
+ exp = Daru::Vector.new [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]
574
+ expect(a).to eq(exp)
575
+ expect(a.dtype).to eq(:array)
576
+ end
577
+
578
+ it "maps and returns a vector of dtype gsl" do
579
+ a = @common_all_dtypes.recode(:gsl) { |v| v == -99 ? 1 : 0 }
580
+ exp = Daru::Vector.new [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], dtype: :gsl
581
+ expect(a).to eq(exp)
582
+ expect(a.dtype).to eq(:gsl)
583
+ end
584
+
585
+ it "maps and returns a vector of dtype nmatrix" do
586
+ a = @common_all_dtypes.recode(:nmatrix) { |v| v == -99 ? 1 : 0 }
587
+ exp = Daru::Vector.new [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], dtype: :nmatrix
588
+ expect(a).to eq(exp)
589
+ expect(a.dtype).to eq(:nmatrix)
590
+ end
591
+ end
592
+
593
+ context "#recode!" do
594
+ before :each do
595
+ @vector = Daru::Vector.new(
596
+ [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, 11, -99, -99],
597
+ dtype: dtype, name: :common_all_dtypes)
598
+ end
599
+
600
+ it "destructively maps and returns a vector of dtype of self by default" do
601
+ @vector.recode! { |v| v == -99 ? 1 : 0 }
602
+ exp = Daru::Vector.new [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]
603
+ expect(@vector).to eq(exp)
604
+ expect(@vector.dtype).to eq(dtype)
605
+ end
606
+
607
+ it "destructively maps and returns a vector of dtype gsl" do
608
+ @vector.recode!(:gsl) { |v| v == -99 ? 1 : 0 }
609
+ exp = Daru::Vector.new [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], dtype: :gsl
610
+ expect(@vector).to eq(exp)
611
+ expect(@vector.dtype).to eq(exp.dtype)
612
+ end
613
+
614
+ it "destructively maps and returns a vector of dtype nmatrix" do
615
+ @vector.recode!(:nmatrix) { |v| v == -99 ? 1 : 0 }
616
+ exp = Daru::Vector.new [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1], dtype: :nmatrix
617
+ expect(@vector).to eq(exp)
618
+ expect(@vector.dtype).to eq(exp.dtype)
619
+ end
620
+ end
621
+
622
+ context "#verify" do
623
+ it "returns a hash of invalid data and index of data" do
624
+ v = Daru::Vector.new [1,2,3,4,5,6,-99,35,-100], dtype: dtype
625
+ h = v.verify { |d| d > 0 }
626
+ e = { 6 => -99, 8 => -100 }
627
+ expect(h).to eq(e)
628
+ end
629
+ end
630
+
631
+ context "#summary" do
632
+ it "has name in the summary" do
633
+ expect(@common_all_dtypes.summary.match("#{@common_all_dtypes.name}")).to_not eq(nil)
634
+ end
635
+ end
636
+
637
+ context "#bootstrap" do
638
+ it "returns a vector with mean=mu and sd=se" do
639
+ rng = Distribution::Normal.rng(0, 1)
640
+ vector =Daru::Vector.new_with_size(100, dtype: dtype) { rng.call}
641
+
642
+ df = vector.bootstrap([:mean, :sd], 200)
643
+ se = 1 / Math.sqrt(vector.size)
644
+ expect(df[:mean].mean).to be_within(0.3).of(0)
645
+ expect(df[:mean].sd).to be_within(0.02).of(se)
646
+ end
647
+ end
497
648
  end
498
- end # checking with ALL_DTYPES
649
+ end # describe ALL_DTYPES.each
499
650
 
651
+ # -----------------------------------------------------------------------
500
652
  # works with arrays only
653
+
654
+ context "#splitted" do
655
+ it "splits correctly" do
656
+ a = Daru::Vector.new ['a', 'a,b', 'c,d', 'a,d', 'd', 10, nil]
657
+ expect(a.splitted).to eq([%w(a), %w(a b), %w(c d), %w(a d), %w(d), [10], nil])
658
+ end
659
+ end
660
+
661
+ context "#missing_values" do
662
+ before do
663
+ @common = Daru::Vector.new([5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99])
664
+ end
665
+
666
+ it "allows setting the value to be treated as missing" do
667
+ @common.missing_values = [10]
668
+ expect(@common.only_valid.to_a.sort).to eq(
669
+ [-99, -99, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 6, 7, 8, 9]
670
+ )
671
+ expect(@common.to_a).to eq(
672
+ [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99]
673
+ )
674
+
675
+ @common.missing_values = [-99]
676
+ expect(@common.only_valid.to_a.sort).to eq(
677
+ [1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10]
678
+ )
679
+ expect(@common.to_a).to eq(
680
+ [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99]
681
+ )
682
+
683
+ @common.missing_values = []
684
+ expect(@common.only_valid.to_a.sort).to eq(
685
+ [-99, -99, 1, 2, 3, 4, 5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10]
686
+ )
687
+ expect(@common.to_a).to eq(
688
+ [5, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 1, 2, 3, 4, nil, -99, -99]
689
+ )
690
+ end
691
+
692
+ it "responds to has_missing_data? with explicit missing_values" do
693
+ a = Daru::Vector.new [1,2,3,4,10]
694
+ a.missing_values = [10]
695
+
696
+ expect(a.has_missing_data?).to eq(true)
697
+ end
698
+ end
699
+
501
700
  context "#is_nil?" do
502
701
  before(:each) do
503
702
  @with_md = Daru::Vector.new([1,2,nil,3,4,nil])
@@ -523,19 +722,19 @@ describe Daru::Vector do
523
722
  end
524
723
  end
525
724
 
526
- context "#nil_positions" do
725
+ context "#missing_positions" do
527
726
  context Daru::Index do
528
727
  before(:each) do
529
728
  @with_md = Daru::Vector.new([1,2,nil,3,4,nil])
530
729
  end
531
730
 
532
731
  it "returns the indexes of nils" do
533
- expect(@with_md.nil_positions).to eq([2,5])
732
+ expect(@with_md.missing_positions).to eq([2,5])
534
733
  end
535
734
 
536
735
  it "updates after assingment" do
537
736
  @with_md[3] = nil
538
- expect(@with_md.nil_positions).to eq([2,3,5])
737
+ expect(@with_md.missing_positions).to eq([2,3,5])
539
738
  end
540
739
  end
541
740
 
@@ -617,4 +816,161 @@ describe Daru::Vector do
617
816
  expect(@vector.to_matrix(:vertical)).to eq(Matrix.columns([[1,2,3,4,5,6]]))
618
817
  end
619
818
  end
819
+
820
+ context "#only_valid" do
821
+ it "returns a Vector of only non-nil data" do
822
+ vector = Daru::Vector.new [1,2,3,4,nil,3,nil],
823
+ index: [:a, :b, :c, :d, :e, :f, :g]
824
+ expect(vector.only_valid).to eq(Daru::Vector.new([1,2,3,4,3],
825
+ index: [:a, :b, :c, :d, :f]))
826
+ end
827
+ end
828
+
829
+ context "#only_numerics" do
830
+ it "returns only numerical or missing data" do
831
+ v = Daru::Vector.new([1,2,nil,3,4,'s','a',nil])
832
+ expect(v.only_numerics).to eq(Daru::Vector.new([1,2,nil,3,4,nil],
833
+ index: [0,1,2,3,4,7]))
834
+ end
835
+ end
836
+
837
+ context "#to_gsl" do
838
+ it "returns a GSL::Vector of non-nil data" do
839
+ vector = Daru::Vector.new [1,2,3,4,nil,6,nil]
840
+ expect(vector.to_gsl).to eq(GSL::Vector.alloc(1,2,3,4,6))
841
+
842
+ gsl_vec = Daru::Vector.new [1,2,3,4,5], dtype: :gsl
843
+ expect(gsl_vec.to_gsl).to eq(GSL::Vector.alloc(1,2,3,4,5))
844
+ end
845
+ end
846
+
847
+ context "#split_by_separator" do
848
+ def expect_correct_tokens hash
849
+ expect(hash['a'].to_a).to eq([1, 1, 0, 1, 0, nil])
850
+ expect(hash['b'].to_a).to eq([0, 1, 0, 0, 0, nil])
851
+ expect(hash['c'].to_a).to eq([0, 0, 1, 0, 0, nil])
852
+ expect(hash['d'].to_a).to eq([0, 0, 1, 1, 0, nil])
853
+ expect(hash[10].to_a).to eq([0, 0, 0, 0, 1, nil])
854
+ end
855
+
856
+ before do
857
+ @a = Daru::Vector.new ['a', 'a,b', 'c,d', 'a,d', 10, nil]
858
+ @b = @a.split_by_separator(',')
859
+ end
860
+
861
+ it "returns a Hash" do
862
+ expect(@b.class).to eq(Hash)
863
+ end
864
+
865
+ it "returned Hash has keys with with different values of @a" do
866
+ expect(@b.keys).to eq(['a', 'b', 'c', 'd', 10])
867
+ end
868
+
869
+ it "returns a Hash, whose values are Daru::Vector" do
870
+ @b.each_key do |key|
871
+ expect(@b[key].class).to eq(Daru::Vector)
872
+ end
873
+ end
874
+
875
+ it "ensures that hash values are n times the tokens appears" do
876
+ expect_correct_tokens @b
877
+ end
878
+
879
+ it "gives the same values using a different separator" do
880
+ a = Daru::Vector.new ['a', 'a*b', 'c*d', 'a*d', 10, nil]
881
+ b = a.split_by_separator '*'
882
+ expect_correct_tokens b
883
+ end
884
+ end
885
+
886
+ context "#split_by_separator_freq" do
887
+ it "returns the number of ocurrences of tokens" do
888
+ a = Daru::Vector.new ['a', 'a,b', 'c,d', 'a,d', 10, nil]
889
+ expect(a.split_by_separator_freq).to eq(
890
+ { 'a' => 3, 'b' => 1, 'c' => 1, 'd' => 2, 10 => 1 })
891
+ end
892
+ end
893
+
894
+ context "#n_valid" do
895
+ it "returns number of non-missing positions" do
896
+ v = Daru::Vector.new [1,2,3,4,nil,nil,3,5]
897
+ expect(v.n_valid).to eq(6)
898
+ end
899
+ end
900
+
901
+ context "#reset_index!" do
902
+ it "resets any index to a numerical serialized index" do
903
+ v = Daru::Vector.new([1,2,3,4,5,nil,nil,4,nil])
904
+ r = v.only_valid.reset_index!
905
+ expect(r).to eq(Daru::Vector.new([1,2,3,4,5,4]))
906
+ expect(r.index).to eq(Daru::Index.new([0,1,2,3,4,5]))
907
+
908
+ indexed = Daru::Vector.new([1,2,3,4,5], index: [:a, :b, :c, :d, :e])
909
+ expect(indexed.reset_index!.index).to eq(Daru::Index.new([0,1,2,3,4]))
910
+ end
911
+ end
912
+
913
+ context "#rename" do
914
+ before :each do
915
+ @v = Daru::Vector.new [1,2,3,4,5,5], name: :this_vector
916
+ end
917
+
918
+ it "assings name" do
919
+ @v.rename :that_vector
920
+ expect(@v.name).to eq(:that_vector)
921
+ end
922
+
923
+ it "stores name as a symbol" do
924
+ @v.rename "This is a vector"
925
+ expect(@v.name).to eq(:"This is a vector")
926
+ end
927
+ end
928
+
929
+ context "#any?" do
930
+ before do
931
+ @v = Daru::Vector.new([1,2,3,4,5])
932
+ end
933
+
934
+ it "returns true if block returns true for any one of the elements" do
935
+ expect(@v.any?{ |e| e == 1 }).to eq(true)
936
+ end
937
+
938
+ it "returns false if block is false for all elements" do
939
+ expect(@v.any?{ |e| e > 10 }).to eq(false)
940
+ end
941
+ end
942
+
943
+ context "#all?" do
944
+ before do
945
+ @v = Daru::Vector.new([1,2,3,4,5])
946
+ end
947
+
948
+ it "returns true if block is true for all elements" do
949
+ expect(@v.all? { |e| e < 6 }).to eq(true)
950
+ end
951
+
952
+ it "returns false if block is false for any one element" do
953
+ expect(@v.all? { |e| e == 2 }).to eq(false)
954
+ end
955
+ end
956
+
957
+ context "#only_missing" do
958
+ it "returns a vector (with proper index) of all the elements marked 'missing'" do
959
+ v = Daru::Vector.new([1,2,3,4,5,6,4,5,5,4,4,nil,nil,nil])
960
+ v.missing_values = [nil, 5]
961
+
962
+ expect(v.only_missing).to eq(Daru::Vector.new([5,5,5,nil,nil,nil],
963
+ index: [4,7,8,11,12,13]))
964
+ end
965
+ end
966
+
967
+ context "#detach_index" do
968
+ it "creates a DataFrame with first Vector as index and second as values of the Vector" do
969
+ v = Daru::Vector.new([1,2,3,4,5,6], index: [:a, :b, :c, :d, :e, :f], name: :values)
970
+ expect(v.detach_index).to eq(Daru::DataFrame.new({
971
+ index: ['a', 'b', 'c', 'd', 'e', 'f'],
972
+ vector: [1,2,3,4,5,6]
973
+ }))
974
+ end
975
+ end
620
976
  end if mri?