daru_lite 0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (149) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/workflows/ci.yml +33 -0
  4. data/.gitignore +10 -0
  5. data/.rspec +2 -0
  6. data/.rubocop.yml +27 -0
  7. data/.rubocop_todo.yml +137 -0
  8. data/CONTRIBUTING.md +47 -0
  9. data/Gemfile +2 -0
  10. data/History.md +4 -0
  11. data/LICENSE +24 -0
  12. data/README.md +218 -0
  13. data/Rakefile +69 -0
  14. data/ReleasePolicy.md +20 -0
  15. data/benchmarks/TradeoffData.csv +65 -0
  16. data/benchmarks/csv_reading.rb +22 -0
  17. data/benchmarks/dataframe_creation.rb +39 -0
  18. data/benchmarks/db_loading.rb +34 -0
  19. data/benchmarks/duplicating.rb +45 -0
  20. data/benchmarks/group_by.rb +32 -0
  21. data/benchmarks/joining.rb +52 -0
  22. data/benchmarks/row_access.rb +41 -0
  23. data/benchmarks/row_assign.rb +36 -0
  24. data/benchmarks/sorting.rb +51 -0
  25. data/benchmarks/statistics.rb +28 -0
  26. data/benchmarks/vector_access.rb +31 -0
  27. data/benchmarks/vector_assign.rb +42 -0
  28. data/benchmarks/where_clause.rb +48 -0
  29. data/benchmarks/where_vs_filter.rb +28 -0
  30. data/daru_lite.gemspec +55 -0
  31. data/images/README.md +5 -0
  32. data/images/con0.png +0 -0
  33. data/images/con1.png +0 -0
  34. data/images/init0.png +0 -0
  35. data/images/init1.png +0 -0
  36. data/images/man0.png +0 -0
  37. data/images/man1.png +0 -0
  38. data/images/man2.png +0 -0
  39. data/images/man3.png +0 -0
  40. data/images/man4.png +0 -0
  41. data/images/man5.png +0 -0
  42. data/images/man6.png +0 -0
  43. data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
  44. data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
  45. data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
  46. data/lib/daru_lite/category.rb +929 -0
  47. data/lib/daru_lite/configuration.rb +34 -0
  48. data/lib/daru_lite/core/group_by.rb +403 -0
  49. data/lib/daru_lite/core/merge.rb +270 -0
  50. data/lib/daru_lite/core/query.rb +109 -0
  51. data/lib/daru_lite/dataframe.rb +3080 -0
  52. data/lib/daru_lite/date_time/index.rb +569 -0
  53. data/lib/daru_lite/date_time/offsets.rb +397 -0
  54. data/lib/daru_lite/exceptions.rb +2 -0
  55. data/lib/daru_lite/extensions/which_dsl.rb +53 -0
  56. data/lib/daru_lite/formatters/table.rb +52 -0
  57. data/lib/daru_lite/helpers/array.rb +53 -0
  58. data/lib/daru_lite/index/categorical_index.rb +201 -0
  59. data/lib/daru_lite/index/index.rb +374 -0
  60. data/lib/daru_lite/index/multi_index.rb +374 -0
  61. data/lib/daru_lite/io/csv/converters.rb +21 -0
  62. data/lib/daru_lite/io/io.rb +294 -0
  63. data/lib/daru_lite/io/sql_data_source.rb +97 -0
  64. data/lib/daru_lite/iruby/helpers.rb +38 -0
  65. data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
  66. data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
  67. data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
  68. data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
  69. data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
  70. data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
  71. data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
  72. data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
  73. data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
  74. data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
  75. data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
  76. data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
  77. data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
  78. data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
  79. data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
  80. data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
  81. data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
  82. data/lib/daru_lite/monkeys.rb +56 -0
  83. data/lib/daru_lite/vector.rb +1678 -0
  84. data/lib/daru_lite/version.rb +3 -0
  85. data/lib/daru_lite.rb +99 -0
  86. data/profile/_base.rb +23 -0
  87. data/profile/df_to_a.rb +10 -0
  88. data/profile/filter.rb +13 -0
  89. data/profile/joining.rb +13 -0
  90. data/profile/sorting.rb +12 -0
  91. data/profile/vector_each_with_index.rb +9 -0
  92. data/profile/vector_new.rb +9 -0
  93. data/spec/accessors/array_wrapper_spec.rb +3 -0
  94. data/spec/category_spec.rb +1741 -0
  95. data/spec/core/group_by_spec.rb +655 -0
  96. data/spec/core/merge_spec.rb +179 -0
  97. data/spec/core/query_spec.rb +347 -0
  98. data/spec/daru_lite_spec.rb +22 -0
  99. data/spec/dataframe_spec.rb +4330 -0
  100. data/spec/date_time/data_spec.rb +197 -0
  101. data/spec/date_time/date_time_index_helper_spec.rb +72 -0
  102. data/spec/date_time/index_spec.rb +588 -0
  103. data/spec/date_time/offsets_spec.rb +465 -0
  104. data/spec/extensions/which_dsl_spec.rb +38 -0
  105. data/spec/fixtures/bank2.dat +200 -0
  106. data/spec/fixtures/boolean_converter_test.csv +5 -0
  107. data/spec/fixtures/countries.json +7794 -0
  108. data/spec/fixtures/duplicates.csv +32 -0
  109. data/spec/fixtures/eciresults.html +394 -0
  110. data/spec/fixtures/empties.dat +2 -0
  111. data/spec/fixtures/empty_rows_test.csv +17 -0
  112. data/spec/fixtures/macau.html +3691 -0
  113. data/spec/fixtures/macd_data.csv +150 -0
  114. data/spec/fixtures/matrix_test.csv +100 -0
  115. data/spec/fixtures/moneycontrol.html +6812 -0
  116. data/spec/fixtures/music_data.tsv +2501 -0
  117. data/spec/fixtures/repeated_fields.csv +7 -0
  118. data/spec/fixtures/sales-funnel.csv +18 -0
  119. data/spec/fixtures/scientific_notation.csv +4 -0
  120. data/spec/fixtures/string_converter_test.csv +5 -0
  121. data/spec/fixtures/strings.dat +2 -0
  122. data/spec/fixtures/test_xls.xls +0 -0
  123. data/spec/fixtures/test_xls_2.xls +0 -0
  124. data/spec/fixtures/url_test.txt~ +0 -0
  125. data/spec/fixtures/valid_markup.html +62 -0
  126. data/spec/fixtures/wiki_climate.html +1243 -0
  127. data/spec/fixtures/wiki_table_info.html +631 -0
  128. data/spec/formatters/table_formatter_spec.rb +137 -0
  129. data/spec/helpers_spec.rb +8 -0
  130. data/spec/index/categorical_index_spec.rb +170 -0
  131. data/spec/index/index_spec.rb +417 -0
  132. data/spec/index/multi_index_spec.rb +680 -0
  133. data/spec/io/io_spec.rb +373 -0
  134. data/spec/io/sql_data_source_spec.rb +56 -0
  135. data/spec/iruby/dataframe_spec.rb +170 -0
  136. data/spec/iruby/helpers_spec.rb +49 -0
  137. data/spec/iruby/multi_index_spec.rb +37 -0
  138. data/spec/iruby/vector_spec.rb +105 -0
  139. data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
  140. data/spec/maths/arithmetic/vector_spec.rb +165 -0
  141. data/spec/maths/statistics/dataframe_spec.rb +178 -0
  142. data/spec/maths/statistics/vector_spec.rb +756 -0
  143. data/spec/monkeys_spec.rb +42 -0
  144. data/spec/shared/vector_display_spec.rb +213 -0
  145. data/spec/spec_helper.rb +87 -0
  146. data/spec/support/database_helper.rb +30 -0
  147. data/spec/support/matchers.rb +5 -0
  148. data/spec/vector_spec.rb +2293 -0
  149. metadata +571 -0
@@ -0,0 +1,105 @@
1
+ describe DaruLite::Vector, '#to_html' do
2
+ [nil, :category].each do |type|
3
+ let(:doc) { Nokogiri::HTML(vector.to_html) }
4
+ subject(:table) { doc.at('table') }
5
+ let(:header) { doc.at('b') }
6
+
7
+ context 'simple' do
8
+ let(:vector) { DaruLite::Vector.new [1,nil,3],
9
+ index: [:a, :b, :c], name: 'test', type: type }
10
+ it { is_expected.not_to be_nil }
11
+
12
+ describe 'header' do
13
+ subject { header }
14
+ it { is_expected.not_to be_nil }
15
+ its(:text) { is_expected.to eq " DaruLite::Vector(3)"\
16
+ "#{":category" if type == :category} " }
17
+ end
18
+
19
+ describe 'name' do
20
+ subject(:name) { table.at('tr:nth-child(1) > th:nth-child(2)') }
21
+ it { is_expected.not_to be_nil }
22
+ its(:text) { is_expected.to eq 'test' }
23
+
24
+ context 'withought name' do
25
+ let(:vector) { DaruLite::Vector.new [1,nil,3], index: [:a, :b, :c], type: type }
26
+
27
+ it { is_expected.to be_nil }
28
+ end
29
+ end
30
+
31
+ describe 'index' do
32
+ subject(:indexes) { table.search('tr > td:first-child').map(&:text) }
33
+ its(:count) { is_expected.to eq vector.size }
34
+ it { is_expected.to eq vector.index.to_a.map(&:to_s) }
35
+ end
36
+
37
+ describe 'values' do
38
+ subject(:indexes) { table.search('tr > td:last-child').map(&:text) }
39
+ its(:count) { is_expected.to eq vector.size }
40
+ it { is_expected.to eq vector.to_a.map(&:to_s) }
41
+ end
42
+ end
43
+
44
+ context 'large vector' do
45
+ subject(:vector) { DaruLite::Vector.new [1,2,3] * 100, name: 'test', type: type }
46
+ it 'has only 30 rows (+ 1 header rows, + 2 finishing rows)' do
47
+ expect(table.search('tr').size).to eq 33
48
+ end
49
+
50
+ describe '"skipped" row' do
51
+ subject(:row) { table.search('tr:nth-child(31) td').map(&:text) }
52
+ its(:count) { is_expected.to eq 2 }
53
+ it { is_expected.to eq ['...', '...'] }
54
+ end
55
+
56
+ describe 'last row' do
57
+ subject(:row) { table.search('tr:nth-child(32) td').map(&:text) }
58
+ its(:count) { is_expected.to eq 2 }
59
+ it { is_expected.to eq ['299', '3'] }
60
+ end
61
+ end
62
+
63
+ context 'multi-index' do
64
+ subject(:vector) {
65
+ DaruLite::Vector.new(
66
+ [1,2,3,4,5,6,7],
67
+ name: 'test',
68
+ type: type,
69
+ index: DaruLite::MultiIndex.from_tuples([
70
+ %w[foo one],
71
+ %w[foo two],
72
+ %w[foo three],
73
+ %w[bar one],
74
+ %w[bar two],
75
+ %w[bar three],
76
+ %w[baz one],
77
+ ]),
78
+ )
79
+ }
80
+
81
+ describe 'header' do
82
+ subject { header }
83
+ it { is_expected.not_to be_nil }
84
+ its(:text) { is_expected.to eq " DaruLite::Vector(7)"\
85
+ "#{":category" if type == :category} " }
86
+ end
87
+
88
+ describe 'name row' do
89
+ subject(:row) { table.at('tr:nth-child(1)').search('th') }
90
+ its(:count) { should == 2 }
91
+ it { expect(row.first['colspan']).to eq '2' }
92
+ end
93
+
94
+ describe 'first data row' do
95
+ let(:row) { table.at('tbody > tr:first-child') }
96
+ subject { row.inner_html.scan(/<t[dh].+?<\/t[dh]>/) }
97
+ it { is_expected.to eq [
98
+ '<th rowspan="3">foo</th>',
99
+ '<th rowspan="1">one</th>',
100
+ '<td>1</td>'
101
+ ]}
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,148 @@
1
+ describe DaruLite::DataFrame do
2
+ before(:each) do
3
+ @df = DaruLite::DataFrame.new({a: [1,2,3,4,5], b: ['a','e','i','o','u'],
4
+ c: [10,20,30,40,50]})
5
+ @left = DaruLite::DataFrame.new({a: [1,nil,nil,4], b: [10,nil,nil,40], c: [5,6,7,8]},
6
+ index: [0,4,5,3])
7
+ @right = DaruLite::DataFrame.new({a: [1,2,3,4,5], b: [10,20,30,40,50]},
8
+ index: [0,1,2,3,6])
9
+ end
10
+
11
+ context "#+" do
12
+ it "adds a number to all numeric vectors" do
13
+ expect(@df + 2).to eq(DaruLite::DataFrame.new({a: [3,4,5,6,7], b: ['a','e','i','o','u'],
14
+ c: [12,22,32,42,52] }))
15
+ end
16
+
17
+ it "adds two dataframes to produce a third" do
18
+ expect(@left + @right).to eq(DaruLite::DataFrame.new({
19
+ a: [2,nil,nil,8,nil,nil],
20
+ b: [20,nil,nil,80,nil,nil],
21
+ c: [nil,nil,nil,nil,nil,nil]
22
+ }, index: [0,1,2,3,4,5,6]))
23
+ end
24
+ end
25
+
26
+ context "#-" do
27
+ it "subtracts a number from all numeric vectors" do
28
+ expect(@df - 2).to eq(DaruLite::DataFrame.new({
29
+ a: [-1,0,1,2,3],
30
+ b: ['a','e','i','o','u'],
31
+ c: [8,18,28,38,48]}))
32
+ end
33
+
34
+ it "subtracts a data frame from another" do
35
+ expect(@left - @right).to eq(DaruLite::DataFrame.new({
36
+ a: [0,nil,nil,0,nil,nil],
37
+ b: [0,nil,nil,0,nil,nil],
38
+ c: [nil,nil,nil,nil,nil,nil]
39
+ }, index: [0,1,2,3,4,5,6]))
40
+ end
41
+ end
42
+
43
+ context "#*" do
44
+ it "multiplies all numeric vectors by number" do
45
+ expect(@df * 2).to eq(DaruLite::DataFrame.new({a: [2,4,6,8,10], b: ['a','e','i','o','u'],
46
+ c: [20,40,60,80,100] }))
47
+ end
48
+
49
+ it "multipies two dataframes to produce a third" do
50
+ expect(@left * @right).to eq(DaruLite::DataFrame.new({
51
+ a: [1,nil,nil,16,nil,nil],
52
+ b: [100,nil,nil,1600,nil,nil],
53
+ c: [nil,nil,nil,nil,nil,nil]
54
+ }, index: [0,1,2,3,4,5,6]))
55
+ end
56
+ end
57
+
58
+ context "#/" do
59
+ it "divides all numeric vectors by number" do
60
+ expect(@df / 2.0).to eq(DaruLite::DataFrame.new({a: [0.5, 1, 1.5, 2, 2.5], b: ['a','e','i','o','u'],
61
+ c: [5,10,15,20,25] }))
62
+ end
63
+
64
+ it "multipies two dataframes to produce a third" do
65
+ # NB: this and other tests of two DF interactions are not EXTREMELY
66
+ # useful, but to know that nothing unexpected emerges here
67
+ expect(@left / @right).to eq(DaruLite::DataFrame.new({
68
+ a: [1,nil,nil,1,nil,nil],
69
+ b: [1,nil,nil,1,nil,nil],
70
+ c: [nil,nil,nil,nil,nil,nil]
71
+ }, index: [0,1,2,3,4,5,6]))
72
+ end
73
+ end
74
+
75
+ context "#%" do
76
+ it "divides all numeric vectors by number and returns reminder" do
77
+ expect(@df % 3).to eq(DaruLite::DataFrame.new({a: [1, 2, 0, 1, 2], b: ['a','e','i','o','u'],
78
+ c: [1, 2, 0, 1, 2] }))
79
+ end
80
+
81
+ it "returns reminder of per-item division" do
82
+ expect(@left % @right).to eq(DaruLite::DataFrame.new({
83
+ a: [0,nil,nil,0,nil,nil],
84
+ b: [0,nil,nil,0,nil,nil],
85
+ c: [nil,nil,nil,nil,nil,nil]
86
+ }, index: [0,1,2,3,4,5,6]))
87
+ end
88
+ end
89
+
90
+ context "#**" do
91
+ it "calculates result of each numeric value pow" do
92
+ expect(@df ** 2).to eq(DaruLite::DataFrame.new({a: [1, 4, 9, 16, 25], b: ['a','e','i','o','u'],
93
+ c: [100, 400, 900, 1600, 2500] }))
94
+ end
95
+
96
+ it "returns per-item pow" do
97
+ expect(@left ** @right).to eq(DaruLite::DataFrame.new({
98
+ a: [1,nil,nil,4**4,nil,nil],
99
+ b: [10**10,nil,nil,40**40,nil,nil],
100
+ c: [nil,nil,nil,nil,nil,nil]
101
+ }, index: [0,1,2,3,4,5,6]))
102
+ end
103
+ end
104
+
105
+ context "#sqrt" do
106
+ it "calculates sqrt" do
107
+ expect_correct_df_in_delta(@df.sqrt,
108
+ DaruLite::DataFrame.new({
109
+ a: [1.0,1.41421356,1.73205080,2.0,2.23606797],
110
+ c: [3.16227766, 4.47213595 ,5.47722557 ,6.32455532, 7.07106781]
111
+ }), 0.001
112
+ )
113
+ end
114
+ end
115
+
116
+ context "#round" do
117
+ it "rounds to precision" do
118
+ df = DaruLite::DataFrame.new({
119
+ a: [1.3434,2.4332,5.6655,12.3344,32.233],
120
+ b: [1.3434,2.4332,5.6655,12.3344,32.233],
121
+ c: %w(a b c d e)
122
+ })
123
+ ans = DaruLite::DataFrame.new({
124
+ a: [1.34,2.43,5.67,12.33,32.23],
125
+ b: [1.34,2.43,5.67,12.33,32.23],
126
+ })
127
+
128
+ expect(df.round(2)).to eq(ans)
129
+ end
130
+ end
131
+
132
+ context "#exp" do
133
+ it "calculates exponential" do
134
+ e = Math::E
135
+ df = DaruLite::DataFrame.new({
136
+ a: [1,2,3],
137
+ b: [4,5,6],
138
+ c: %w(a b c)
139
+ })
140
+ ans = DaruLite::DataFrame.new({
141
+ a: [e, e**2, e**3],
142
+ b: [e**4, e**5, e**6],
143
+ })
144
+
145
+ expect_correct_df_in_delta(df.exp, ans, 0.0001)
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,165 @@
1
+ describe DaruLite::Vector do
2
+ let(:dv1) { described_class.new(values1, name: :boozy, index: indexes1) }
3
+ let(:dv2) { described_class.new(values2, name: :mayer, index: indexes2) }
4
+ let(:with_md1) do
5
+ described_class.new([1, 2, 3, nil, 5, nil], name: :missing, index: indexes_with_md1)
6
+ end
7
+ let(:with_md2) do
8
+ described_class.new([1, 2, 3, nil, 5, nil], name: :missing, index: [:obi, :wan, :corona, :a, :b, :c])
9
+ end
10
+ let(:values1) { [1, 2, 3, 4] }
11
+ let(:values2) { [1, 2, 3, 4] }
12
+ let(:indexes1) { [:bud, :kf, :henie, :corona] }
13
+ let(:indexes2) { [:obi, :wan, :kf, :corona] }
14
+ let(:indexes1_and_2) { [:bud, :corona, :henie, :kf, :obi, :wan] }
15
+ let(:indexes_with_md1) { [:a, :b, :c, :obi, :wan, :corona] }
16
+ let(:indexes_with_md1_and_2) { [:a, :b, :c, :corona, :obi, :wan] }
17
+
18
+ describe "#+" do
19
+ it "adds matching indexes of the other vector" do
20
+ expect(dv1 + dv2).to eq(
21
+ DaruLite::Vector.new([nil, 8, nil, 5, nil, nil], name: :boozy, index: indexes1_and_2)
22
+ )
23
+ end
24
+
25
+ it "adds number to each element of the entire vector" do
26
+ expect(dv1 + 5).to eq(DaruLite::Vector.new(values1.map { |v| v + 5 }, name: :boozy, index: indexes1))
27
+ end
28
+
29
+ it "does not add when a number is being added" do
30
+ expect(with_md1 + 1).to eq(
31
+ DaruLite::Vector.new([2, 3, 4, nil, 6, nil], name: :missing, index: indexes_with_md1)
32
+ )
33
+ end
34
+
35
+ it "puts a nil when one of the operands is nil" do
36
+ expect(with_md1 + with_md2).to eq(
37
+ DaruLite::Vector.new([nil, 7, nil, nil, nil, 7], name: :missing, index: indexes_with_md1_and_2)
38
+ )
39
+ end
40
+
41
+ context 'when vectors have numeric and non-numeric indexes' do
42
+ let(:indexes1) { nil }
43
+ let(:indexes2) { [:a, :b, :c, :d] }
44
+
45
+ it "appropriately adds vectors with numeric and non-numeric indexes" do
46
+ expect(dv1 + dv2).to eq(DaruLite::Vector.new(Array.new(6), index: [0, 1, 2, 3] + indexes2))
47
+ end
48
+ end
49
+
50
+ context 'when index contains symbols and strings' do
51
+ let(:indexes1) { [:bud, 'kf', :henie, :corona] }
52
+ let(:indexes2) { [:obi, :wan, 'kf', :corona] }
53
+
54
+ it "adds matching indexes of the other vector" do
55
+ expect(dv1 + dv2).to eq(
56
+ DaruLite::Vector.new([nil, 8, nil, 5, nil, nil], name: :boozy, index: [:bud, :corona, :henie, 'kf', :obi, :wan])
57
+ )
58
+ end
59
+ end
60
+ end
61
+
62
+ describe "#-" do
63
+ it "subtracts matching indexes of the other vector" do
64
+ expect(dv1 - dv2).to eq(
65
+ DaruLite::Vector.new([nil, 0, nil, -1, nil, nil], name: :boozy, index: indexes1_and_2)
66
+ )
67
+ end
68
+
69
+ it "subtracts number from each element of the entire vector" do
70
+ expect(dv1 - 5).to eq(DaruLite::Vector.new(values1.map { |v| v - 5 }, name: :boozy, index: indexes1))
71
+ end
72
+ end
73
+
74
+ describe "#*" do
75
+ it "multiplies matching indexes of the other vector" do
76
+ expect(dv1 * dv2).to eq(
77
+ DaruLite::Vector.new([nil, 16, nil, 6, nil, nil], name: :boozy, index: indexes1_and_2)
78
+ )
79
+ end
80
+
81
+ it "multiplies number to each element of the entire vector" do
82
+ expect(dv1 * 5).to eq(DaruLite::Vector.new(values1.map { |v| v * 5 }, name: :boozy, index: indexes1))
83
+ end
84
+ end
85
+
86
+ describe "#\/" do
87
+ let(:values2) { [1.0, 2.0, 3.0, 4.0] }
88
+
89
+ it "divides matching indexes of the other vector" do
90
+ expect(dv1 / dv2).to eq(
91
+ DaruLite::Vector.new([nil, 1.0, nil, 2 / 3.to_f, nil, nil], name: :boozy, index: indexes1_and_2)
92
+ )
93
+ end
94
+
95
+ it "divides number from each element of the entire vector" do
96
+ expect(dv1 / 5.0).to eq(DaruLite::Vector.new(values1.map { |v| v / 5.0 }, name: :boozy, index: indexes1))
97
+ end
98
+ end
99
+
100
+ describe "#%" do
101
+ it "applies % to matching indexes of the other vector" do
102
+ expect(dv1 % dv2).to eq(DaruLite::Vector.new([nil, 0, nil, 2, nil, nil], name: :boozy, index: indexes1_and_2))
103
+ end
104
+
105
+ it "applies % for each element of the entire vector" do
106
+ expect(dv1 % 5).to eq(
107
+ DaruLite::Vector.new(values1.map { |v| v % 5 }, name: :boozy, index: indexes1)
108
+ )
109
+ end
110
+ end
111
+
112
+ describe "#**" do
113
+ it "applies ** to matching indexes of the other vector" do
114
+ expect(dv1 ** dv2).to eq(DaruLite::Vector.new([nil, 256, nil, 8, nil, nil], name: :boozy, index: indexes1_and_2))
115
+ end
116
+
117
+ it "applies ** for each element of the entire vector" do
118
+ expect(dv1 ** 5).to eq(DaruLite::Vector.new(values1.map { |v| v ** 5 }, name: :boozy, index: indexes1))
119
+ end
120
+ end
121
+
122
+ describe "#exp" do
123
+ it "calculates exp of all numbers" do
124
+ expect(with_md1.exp.round(3)).to eq(
125
+ DaruLite::Vector.new(
126
+ [2.718281828459045, 7.38905609893065, 20.085536923187668, nil, 148.4131591025766, nil],
127
+ index: indexes_with_md1,
128
+ name: :missing
129
+ ).round(3)
130
+ )
131
+ end
132
+ end
133
+
134
+ describe "#add" do
135
+ it "adds two vectors with nils as 0 if skipnil is true" do
136
+ expect(with_md1.add(with_md2, skipnil: true)).to eq(
137
+ DaruLite::Vector.new([1, 7, 3, 3, 1, 7], name: :missing, index: indexes_with_md1_and_2)
138
+ )
139
+ end
140
+
141
+ it "adds two vectors same as :+ if skipnil is false" do
142
+ expect(with_md1.add(with_md2, skipnil: false)).to eq(
143
+ DaruLite::Vector.new([nil, 7, nil, nil, nil, 7], name: :missing, index: indexes_with_md1_and_2)
144
+ )
145
+ end
146
+ end
147
+
148
+ describe "#abs" do
149
+ it "calculates abs value" do
150
+ with_md1.abs
151
+ end
152
+ end
153
+
154
+ describe "#sqrt" do
155
+ it "calculates sqrt" do
156
+ with_md1.sqrt
157
+ end
158
+ end
159
+
160
+ describe "#round" do
161
+ it "rounds to given precision" do
162
+ with_md1.round(2)
163
+ end
164
+ end
165
+ end
@@ -0,0 +1,178 @@
1
+ describe DaruLite::DataFrame do
2
+ before do
3
+ @df = DaruLite::DataFrame.new({
4
+ a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'],
5
+ b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
6
+ c: ['small','large','large','small','small','large','small','large','small'],
7
+ d: [1,2,2,3,3,4,5,6,7],
8
+ e: [2,4,4,6,6,8,10,12,14],
9
+ f: [10,20,20,30,30,40,50,60,70]
10
+ })
11
+ end
12
+
13
+ context "#mean" do
14
+ it "calculates mean of single level numeric only vectors and returns values in a Vector" do
15
+ expect(@df.mean.round(2)).to eq(DaruLite::Vector.new([3.67, 7.33, 36.67],
16
+ index: [:d, :e, :f]
17
+ ))
18
+ end
19
+
20
+ it "calculates mean of multi level numeric only vectors and returns values in a DataFrame" do
21
+ # TODO - pending
22
+ end
23
+ end
24
+
25
+ context "#variance_sample" do
26
+ it "calculates variance of single level numeric only vectors and returns values in a Vector" do
27
+ expect(@df.variance_sample).to eq(DaruLite::Vector.new([4.0, 16.0, 400.0], index: [:d, :e, :f]))
28
+ end
29
+ end
30
+
31
+ context "#std" do
32
+ it "calculates standard deviation of single level numeric only vectors and returns values in a Vector" do
33
+ expect(@df.std).to eq(DaruLite::Vector.new([2, 4, 20], index: [:d, :e, :f]))
34
+ end
35
+ end
36
+
37
+ context "#sum" do
38
+ it "calculates sum of single level numeric only vectors and returns values in a Vector" do
39
+ expect(@df.sum).to eq(DaruLite::Vector.new([33, 66, 330], index: [:d, :e, :f]))
40
+ end
41
+ end
42
+
43
+ context "#count" do
44
+ it "counts number of non-nil single level numeric only vectors and returns values in a Vector" do
45
+ expect(@df.count).to eq(DaruLite::Vector.new([9, 9, 9], index: [:d, :e, :f]))
46
+ end
47
+ end
48
+
49
+ context "#mode" do
50
+ it "calculates mode of single level numeric only vectors and returns values in a Vector" do
51
+ expect(@df.mode).to eq(DaruLite::Vector.new([DaruLite::Vector.new([2,3]), DaruLite::Vector.new([4,6]), DaruLite::Vector.new([20,30])], index: [:d, :e, :f]))
52
+ end
53
+ end
54
+
55
+ context "#median" do
56
+ it "calculates median of single level numeric only vectors and returns values in a Vector" do
57
+ expect(@df.median).to eq(DaruLite::Vector.new([3, 6, 30], index: [:d, :e, :f]))
58
+ end
59
+ end
60
+
61
+ context "#max" do
62
+ it "returns the row that has max" do
63
+ df = DaruLite::DataFrame.new({
64
+ a: [1,2,3,4,5],
65
+ b: ['aa','aaa','a','','dfffdf'],
66
+ c: [11,22,33,44,55]
67
+ })
68
+ expect(df.max(vector: :b)).to eq(
69
+ DaruLite::Vector.new([5,'dfffdf',55], index: [:a, :b, :c]))
70
+ end
71
+ end
72
+
73
+ context "#min" do
74
+ it "calculates mininum of single level numeric only vectors and returns values in a Vector" do
75
+ expect(@df.min).to eq(DaruLite::Vector.new([1, 2, 10], index: [:d, :e, :f]))
76
+ end
77
+ end
78
+
79
+ context "#range" do
80
+ it "calculates range of single level numeric only vectors and returns values in a Vector" do
81
+ expect(@df.range).to eq(DaruLite::Vector.new([6, 12, 60], index: [:d, :e, :f]))
82
+ end
83
+ end
84
+
85
+ context "#product" do
86
+ it "calculates product of single level numeric only vectors and returns values in a Vector" do
87
+ expect(@df.product).to eq(DaruLite::Vector.new([30240, 15482880, 30240000000000], index: [:d, :e, :f]))
88
+ end
89
+ end
90
+
91
+ context "#describe" do
92
+ it "generates mean, std, max, min and count of numeric vectors in one shot" do
93
+ expect(@df.describe.round(2)).to eq(DaruLite::DataFrame.new({
94
+ d: [9.00, 3.67 ,2.00 , 1.00, 7.00],
95
+ e: [9.00, 7.33 ,4.00 , 2.00, 14.00],
96
+ f: [9.00, 36.67,20.00,10.00, 70.00]
97
+ }, index: [:count, :mean, :std, :min, :max]
98
+ ))
99
+ end
100
+ end
101
+
102
+ context "percent_change" do
103
+ it "calculates percent change of numeric vectors" do
104
+ expect(@df.percent_change.round(2)).to eq(DaruLite::DataFrame.new({
105
+ d: [nil, 1.0, 0.0, 0.5, 0.0, 0.33, 0.25, 0.2, 0.17],
106
+ e: [nil, 1.0, 0.0, 0.5, 0.0, 0.33, 0.25, 0.2, 0.17],
107
+ f: [nil, 1.0, 0.0, 0.5, 0.0, 0.33, 0.25, 0.2, 0.17] }
108
+ ))
109
+ end
110
+ end
111
+
112
+ context "#cov" do
113
+ it "calculates the variance covariance of the numeric vectors of DataFrame" do
114
+ expect(@df.cov).to eq(DaruLite::DataFrame.new({
115
+ d: [4,8,40],
116
+ e: [8,16,80],
117
+ f: [40,80,400]
118
+ }, index: [:d, :e, :f]
119
+ ))
120
+
121
+ test = DaruLite::DataFrame.rows([
122
+ [0.3543,0.4535,0.2424],
123
+ [0.123,0.53323,0.544],
124
+ [0.4345,0.4552,0.425]
125
+ ], order: [:a, :b, :c])
126
+ ans = DaruLite::DataFrame.new({
127
+ a: [0.0261607, -0.0071019, -0.0153640],
128
+ b: [-0.0071019, 0.0020747, 0.0056071],
129
+ c: [-0.0153640, 0.0056071, 0.0230777]
130
+ })
131
+
132
+ test.cov.each_vector_with_index do |v, i|
133
+ expect_correct_vector_in_delta v, ans[i], 0.01
134
+ end
135
+ end
136
+ end
137
+
138
+ context "#corr" do
139
+ it "calculates the correlation between the numeric vectors of DataFrame" do
140
+ expect(@df.corr).to eq(DaruLite::DataFrame.new({
141
+ d: [1,1,1],
142
+ e: [1,1,1],
143
+ f: [1,1,1]
144
+ }, index: [:d, :e, :f]
145
+ ))
146
+ end
147
+ end
148
+
149
+ context "#cumsum" do
150
+ it "calculates cumulative sum of numeric vectors" do
151
+ answer = DaruLite::DataFrame.new({
152
+ d: [1,3,5,8,11,15,20,26,33],
153
+ e: [2,6,10,16,22,30,40,52,66],
154
+ f: [10,30,50,80,110,150,200,260,330]
155
+ })
156
+ expect(@df.cumsum).to eq(answer)
157
+ end
158
+ end
159
+
160
+ context "#rolling_mean" do
161
+ it "calculates rolling mean" do
162
+ v = DaruLite::Vector.new([17.28, 17.45, 17.84, 17.74, 17.82, 17.85, 17.36, 17.3, 17.56, 17.49, 17.46, 17.4, 17.03, 17.01,16.86, 16.86, 16.56, 16.36, 16.66, 16.77])
163
+ df = DaruLite::DataFrame.new({ a: v, b: v, c: v })
164
+ answer = df.rolling_mean
165
+
166
+ expect(answer[:a][-1]) .to be_within(0.001).of(16.897)
167
+ expect(answer[:b][-5]) .to be_within(0.001).of(17.233)
168
+ expect(answer[:c][-10]).to be_within(0.001).of(17.587)
169
+ end
170
+ end
171
+
172
+ context "#standardize" do
173
+ it "standardizes" do
174
+ # TODO: Write this test.
175
+ @df.standardize
176
+ end
177
+ end
178
+ end