daru_lite 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. checksums.yaml +7 -0
  2. data/.github/ISSUE_TEMPLATE.md +18 -0
  3. data/.github/workflows/ci.yml +33 -0
  4. data/.gitignore +10 -0
  5. data/.rspec +2 -0
  6. data/.rubocop.yml +27 -0
  7. data/.rubocop_todo.yml +137 -0
  8. data/CONTRIBUTING.md +47 -0
  9. data/Gemfile +2 -0
  10. data/History.md +4 -0
  11. data/LICENSE +24 -0
  12. data/README.md +218 -0
  13. data/Rakefile +69 -0
  14. data/ReleasePolicy.md +20 -0
  15. data/benchmarks/TradeoffData.csv +65 -0
  16. data/benchmarks/csv_reading.rb +22 -0
  17. data/benchmarks/dataframe_creation.rb +39 -0
  18. data/benchmarks/db_loading.rb +34 -0
  19. data/benchmarks/duplicating.rb +45 -0
  20. data/benchmarks/group_by.rb +32 -0
  21. data/benchmarks/joining.rb +52 -0
  22. data/benchmarks/row_access.rb +41 -0
  23. data/benchmarks/row_assign.rb +36 -0
  24. data/benchmarks/sorting.rb +51 -0
  25. data/benchmarks/statistics.rb +28 -0
  26. data/benchmarks/vector_access.rb +31 -0
  27. data/benchmarks/vector_assign.rb +42 -0
  28. data/benchmarks/where_clause.rb +48 -0
  29. data/benchmarks/where_vs_filter.rb +28 -0
  30. data/daru_lite.gemspec +55 -0
  31. data/images/README.md +5 -0
  32. data/images/con0.png +0 -0
  33. data/images/con1.png +0 -0
  34. data/images/init0.png +0 -0
  35. data/images/init1.png +0 -0
  36. data/images/man0.png +0 -0
  37. data/images/man1.png +0 -0
  38. data/images/man2.png +0 -0
  39. data/images/man3.png +0 -0
  40. data/images/man4.png +0 -0
  41. data/images/man5.png +0 -0
  42. data/images/man6.png +0 -0
  43. data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
  44. data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
  45. data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
  46. data/lib/daru_lite/category.rb +929 -0
  47. data/lib/daru_lite/configuration.rb +34 -0
  48. data/lib/daru_lite/core/group_by.rb +403 -0
  49. data/lib/daru_lite/core/merge.rb +270 -0
  50. data/lib/daru_lite/core/query.rb +109 -0
  51. data/lib/daru_lite/dataframe.rb +3080 -0
  52. data/lib/daru_lite/date_time/index.rb +569 -0
  53. data/lib/daru_lite/date_time/offsets.rb +397 -0
  54. data/lib/daru_lite/exceptions.rb +2 -0
  55. data/lib/daru_lite/extensions/which_dsl.rb +53 -0
  56. data/lib/daru_lite/formatters/table.rb +52 -0
  57. data/lib/daru_lite/helpers/array.rb +53 -0
  58. data/lib/daru_lite/index/categorical_index.rb +201 -0
  59. data/lib/daru_lite/index/index.rb +374 -0
  60. data/lib/daru_lite/index/multi_index.rb +374 -0
  61. data/lib/daru_lite/io/csv/converters.rb +21 -0
  62. data/lib/daru_lite/io/io.rb +294 -0
  63. data/lib/daru_lite/io/sql_data_source.rb +97 -0
  64. data/lib/daru_lite/iruby/helpers.rb +38 -0
  65. data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
  66. data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
  67. data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
  68. data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
  69. data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
  70. data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
  71. data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
  72. data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
  73. data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
  74. data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
  75. data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
  76. data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
  77. data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
  78. data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
  79. data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
  80. data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
  81. data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
  82. data/lib/daru_lite/monkeys.rb +56 -0
  83. data/lib/daru_lite/vector.rb +1678 -0
  84. data/lib/daru_lite/version.rb +3 -0
  85. data/lib/daru_lite.rb +99 -0
  86. data/profile/_base.rb +23 -0
  87. data/profile/df_to_a.rb +10 -0
  88. data/profile/filter.rb +13 -0
  89. data/profile/joining.rb +13 -0
  90. data/profile/sorting.rb +12 -0
  91. data/profile/vector_each_with_index.rb +9 -0
  92. data/profile/vector_new.rb +9 -0
  93. data/spec/accessors/array_wrapper_spec.rb +3 -0
  94. data/spec/category_spec.rb +1741 -0
  95. data/spec/core/group_by_spec.rb +655 -0
  96. data/spec/core/merge_spec.rb +179 -0
  97. data/spec/core/query_spec.rb +347 -0
  98. data/spec/daru_lite_spec.rb +22 -0
  99. data/spec/dataframe_spec.rb +4330 -0
  100. data/spec/date_time/data_spec.rb +197 -0
  101. data/spec/date_time/date_time_index_helper_spec.rb +72 -0
  102. data/spec/date_time/index_spec.rb +588 -0
  103. data/spec/date_time/offsets_spec.rb +465 -0
  104. data/spec/extensions/which_dsl_spec.rb +38 -0
  105. data/spec/fixtures/bank2.dat +200 -0
  106. data/spec/fixtures/boolean_converter_test.csv +5 -0
  107. data/spec/fixtures/countries.json +7794 -0
  108. data/spec/fixtures/duplicates.csv +32 -0
  109. data/spec/fixtures/eciresults.html +394 -0
  110. data/spec/fixtures/empties.dat +2 -0
  111. data/spec/fixtures/empty_rows_test.csv +17 -0
  112. data/spec/fixtures/macau.html +3691 -0
  113. data/spec/fixtures/macd_data.csv +150 -0
  114. data/spec/fixtures/matrix_test.csv +100 -0
  115. data/spec/fixtures/moneycontrol.html +6812 -0
  116. data/spec/fixtures/music_data.tsv +2501 -0
  117. data/spec/fixtures/repeated_fields.csv +7 -0
  118. data/spec/fixtures/sales-funnel.csv +18 -0
  119. data/spec/fixtures/scientific_notation.csv +4 -0
  120. data/spec/fixtures/string_converter_test.csv +5 -0
  121. data/spec/fixtures/strings.dat +2 -0
  122. data/spec/fixtures/test_xls.xls +0 -0
  123. data/spec/fixtures/test_xls_2.xls +0 -0
  124. data/spec/fixtures/url_test.txt~ +0 -0
  125. data/spec/fixtures/valid_markup.html +62 -0
  126. data/spec/fixtures/wiki_climate.html +1243 -0
  127. data/spec/fixtures/wiki_table_info.html +631 -0
  128. data/spec/formatters/table_formatter_spec.rb +137 -0
  129. data/spec/helpers_spec.rb +8 -0
  130. data/spec/index/categorical_index_spec.rb +170 -0
  131. data/spec/index/index_spec.rb +417 -0
  132. data/spec/index/multi_index_spec.rb +680 -0
  133. data/spec/io/io_spec.rb +373 -0
  134. data/spec/io/sql_data_source_spec.rb +56 -0
  135. data/spec/iruby/dataframe_spec.rb +170 -0
  136. data/spec/iruby/helpers_spec.rb +49 -0
  137. data/spec/iruby/multi_index_spec.rb +37 -0
  138. data/spec/iruby/vector_spec.rb +105 -0
  139. data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
  140. data/spec/maths/arithmetic/vector_spec.rb +165 -0
  141. data/spec/maths/statistics/dataframe_spec.rb +178 -0
  142. data/spec/maths/statistics/vector_spec.rb +756 -0
  143. data/spec/monkeys_spec.rb +42 -0
  144. data/spec/shared/vector_display_spec.rb +213 -0
  145. data/spec/spec_helper.rb +87 -0
  146. data/spec/support/database_helper.rb +30 -0
  147. data/spec/support/matchers.rb +5 -0
  148. data/spec/vector_spec.rb +2293 -0
  149. metadata +571 -0
@@ -0,0 +1,105 @@
1
+ describe DaruLite::Vector, '#to_html' do
2
+ [nil, :category].each do |type|
3
+ let(:doc) { Nokogiri::HTML(vector.to_html) }
4
+ subject(:table) { doc.at('table') }
5
+ let(:header) { doc.at('b') }
6
+
7
+ context 'simple' do
8
+ let(:vector) { DaruLite::Vector.new [1,nil,3],
9
+ index: [:a, :b, :c], name: 'test', type: type }
10
+ it { is_expected.not_to be_nil }
11
+
12
+ describe 'header' do
13
+ subject { header }
14
+ it { is_expected.not_to be_nil }
15
+ its(:text) { is_expected.to eq " DaruLite::Vector(3)"\
16
+ "#{":category" if type == :category} " }
17
+ end
18
+
19
+ describe 'name' do
20
+ subject(:name) { table.at('tr:nth-child(1) > th:nth-child(2)') }
21
+ it { is_expected.not_to be_nil }
22
+ its(:text) { is_expected.to eq 'test' }
23
+
24
+ context 'withought name' do
25
+ let(:vector) { DaruLite::Vector.new [1,nil,3], index: [:a, :b, :c], type: type }
26
+
27
+ it { is_expected.to be_nil }
28
+ end
29
+ end
30
+
31
+ describe 'index' do
32
+ subject(:indexes) { table.search('tr > td:first-child').map(&:text) }
33
+ its(:count) { is_expected.to eq vector.size }
34
+ it { is_expected.to eq vector.index.to_a.map(&:to_s) }
35
+ end
36
+
37
+ describe 'values' do
38
+ subject(:indexes) { table.search('tr > td:last-child').map(&:text) }
39
+ its(:count) { is_expected.to eq vector.size }
40
+ it { is_expected.to eq vector.to_a.map(&:to_s) }
41
+ end
42
+ end
43
+
44
+ context 'large vector' do
45
+ subject(:vector) { DaruLite::Vector.new [1,2,3] * 100, name: 'test', type: type }
46
+ it 'has only 30 rows (+ 1 header rows, + 2 finishing rows)' do
47
+ expect(table.search('tr').size).to eq 33
48
+ end
49
+
50
+ describe '"skipped" row' do
51
+ subject(:row) { table.search('tr:nth-child(31) td').map(&:text) }
52
+ its(:count) { is_expected.to eq 2 }
53
+ it { is_expected.to eq ['...', '...'] }
54
+ end
55
+
56
+ describe 'last row' do
57
+ subject(:row) { table.search('tr:nth-child(32) td').map(&:text) }
58
+ its(:count) { is_expected.to eq 2 }
59
+ it { is_expected.to eq ['299', '3'] }
60
+ end
61
+ end
62
+
63
+ context 'multi-index' do
64
+ subject(:vector) {
65
+ DaruLite::Vector.new(
66
+ [1,2,3,4,5,6,7],
67
+ name: 'test',
68
+ type: type,
69
+ index: DaruLite::MultiIndex.from_tuples([
70
+ %w[foo one],
71
+ %w[foo two],
72
+ %w[foo three],
73
+ %w[bar one],
74
+ %w[bar two],
75
+ %w[bar three],
76
+ %w[baz one],
77
+ ]),
78
+ )
79
+ }
80
+
81
+ describe 'header' do
82
+ subject { header }
83
+ it { is_expected.not_to be_nil }
84
+ its(:text) { is_expected.to eq " DaruLite::Vector(7)"\
85
+ "#{":category" if type == :category} " }
86
+ end
87
+
88
+ describe 'name row' do
89
+ subject(:row) { table.at('tr:nth-child(1)').search('th') }
90
+ its(:count) { should == 2 }
91
+ it { expect(row.first['colspan']).to eq '2' }
92
+ end
93
+
94
+ describe 'first data row' do
95
+ let(:row) { table.at('tbody > tr:first-child') }
96
+ subject { row.inner_html.scan(/<t[dh].+?<\/t[dh]>/) }
97
+ it { is_expected.to eq [
98
+ '<th rowspan="3">foo</th>',
99
+ '<th rowspan="1">one</th>',
100
+ '<td>1</td>'
101
+ ]}
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,148 @@
1
+ describe DaruLite::DataFrame do
2
+ before(:each) do
3
+ @df = DaruLite::DataFrame.new({a: [1,2,3,4,5], b: ['a','e','i','o','u'],
4
+ c: [10,20,30,40,50]})
5
+ @left = DaruLite::DataFrame.new({a: [1,nil,nil,4], b: [10,nil,nil,40], c: [5,6,7,8]},
6
+ index: [0,4,5,3])
7
+ @right = DaruLite::DataFrame.new({a: [1,2,3,4,5], b: [10,20,30,40,50]},
8
+ index: [0,1,2,3,6])
9
+ end
10
+
11
+ context "#+" do
12
+ it "adds a number to all numeric vectors" do
13
+ expect(@df + 2).to eq(DaruLite::DataFrame.new({a: [3,4,5,6,7], b: ['a','e','i','o','u'],
14
+ c: [12,22,32,42,52] }))
15
+ end
16
+
17
+ it "adds two dataframes to produce a third" do
18
+ expect(@left + @right).to eq(DaruLite::DataFrame.new({
19
+ a: [2,nil,nil,8,nil,nil],
20
+ b: [20,nil,nil,80,nil,nil],
21
+ c: [nil,nil,nil,nil,nil,nil]
22
+ }, index: [0,1,2,3,4,5,6]))
23
+ end
24
+ end
25
+
26
+ context "#-" do
27
+ it "subtracts a number from all numeric vectors" do
28
+ expect(@df - 2).to eq(DaruLite::DataFrame.new({
29
+ a: [-1,0,1,2,3],
30
+ b: ['a','e','i','o','u'],
31
+ c: [8,18,28,38,48]}))
32
+ end
33
+
34
+ it "subtracts a data frame from another" do
35
+ expect(@left - @right).to eq(DaruLite::DataFrame.new({
36
+ a: [0,nil,nil,0,nil,nil],
37
+ b: [0,nil,nil,0,nil,nil],
38
+ c: [nil,nil,nil,nil,nil,nil]
39
+ }, index: [0,1,2,3,4,5,6]))
40
+ end
41
+ end
42
+
43
+ context "#*" do
44
+ it "multiplies all numeric vectors by number" do
45
+ expect(@df * 2).to eq(DaruLite::DataFrame.new({a: [2,4,6,8,10], b: ['a','e','i','o','u'],
46
+ c: [20,40,60,80,100] }))
47
+ end
48
+
49
+ it "multipies two dataframes to produce a third" do
50
+ expect(@left * @right).to eq(DaruLite::DataFrame.new({
51
+ a: [1,nil,nil,16,nil,nil],
52
+ b: [100,nil,nil,1600,nil,nil],
53
+ c: [nil,nil,nil,nil,nil,nil]
54
+ }, index: [0,1,2,3,4,5,6]))
55
+ end
56
+ end
57
+
58
+ context "#/" do
59
+ it "divides all numeric vectors by number" do
60
+ expect(@df / 2.0).to eq(DaruLite::DataFrame.new({a: [0.5, 1, 1.5, 2, 2.5], b: ['a','e','i','o','u'],
61
+ c: [5,10,15,20,25] }))
62
+ end
63
+
64
+ it "multipies two dataframes to produce a third" do
65
+ # NB: this and other tests of two DF interactions are not EXTREMELY
66
+ # useful, but to know that nothing unexpected emerges here
67
+ expect(@left / @right).to eq(DaruLite::DataFrame.new({
68
+ a: [1,nil,nil,1,nil,nil],
69
+ b: [1,nil,nil,1,nil,nil],
70
+ c: [nil,nil,nil,nil,nil,nil]
71
+ }, index: [0,1,2,3,4,5,6]))
72
+ end
73
+ end
74
+
75
+ context "#%" do
76
+ it "divides all numeric vectors by number and returns reminder" do
77
+ expect(@df % 3).to eq(DaruLite::DataFrame.new({a: [1, 2, 0, 1, 2], b: ['a','e','i','o','u'],
78
+ c: [1, 2, 0, 1, 2] }))
79
+ end
80
+
81
+ it "returns reminder of per-item division" do
82
+ expect(@left % @right).to eq(DaruLite::DataFrame.new({
83
+ a: [0,nil,nil,0,nil,nil],
84
+ b: [0,nil,nil,0,nil,nil],
85
+ c: [nil,nil,nil,nil,nil,nil]
86
+ }, index: [0,1,2,3,4,5,6]))
87
+ end
88
+ end
89
+
90
+ context "#**" do
91
+ it "calculates result of each numeric value pow" do
92
+ expect(@df ** 2).to eq(DaruLite::DataFrame.new({a: [1, 4, 9, 16, 25], b: ['a','e','i','o','u'],
93
+ c: [100, 400, 900, 1600, 2500] }))
94
+ end
95
+
96
+ it "returns per-item pow" do
97
+ expect(@left ** @right).to eq(DaruLite::DataFrame.new({
98
+ a: [1,nil,nil,4**4,nil,nil],
99
+ b: [10**10,nil,nil,40**40,nil,nil],
100
+ c: [nil,nil,nil,nil,nil,nil]
101
+ }, index: [0,1,2,3,4,5,6]))
102
+ end
103
+ end
104
+
105
+ context "#sqrt" do
106
+ it "calculates sqrt" do
107
+ expect_correct_df_in_delta(@df.sqrt,
108
+ DaruLite::DataFrame.new({
109
+ a: [1.0,1.41421356,1.73205080,2.0,2.23606797],
110
+ c: [3.16227766, 4.47213595 ,5.47722557 ,6.32455532, 7.07106781]
111
+ }), 0.001
112
+ )
113
+ end
114
+ end
115
+
116
+ context "#round" do
117
+ it "rounds to precision" do
118
+ df = DaruLite::DataFrame.new({
119
+ a: [1.3434,2.4332,5.6655,12.3344,32.233],
120
+ b: [1.3434,2.4332,5.6655,12.3344,32.233],
121
+ c: %w(a b c d e)
122
+ })
123
+ ans = DaruLite::DataFrame.new({
124
+ a: [1.34,2.43,5.67,12.33,32.23],
125
+ b: [1.34,2.43,5.67,12.33,32.23],
126
+ })
127
+
128
+ expect(df.round(2)).to eq(ans)
129
+ end
130
+ end
131
+
132
+ context "#exp" do
133
+ it "calculates exponential" do
134
+ e = Math::E
135
+ df = DaruLite::DataFrame.new({
136
+ a: [1,2,3],
137
+ b: [4,5,6],
138
+ c: %w(a b c)
139
+ })
140
+ ans = DaruLite::DataFrame.new({
141
+ a: [e, e**2, e**3],
142
+ b: [e**4, e**5, e**6],
143
+ })
144
+
145
+ expect_correct_df_in_delta(df.exp, ans, 0.0001)
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,165 @@
1
+ describe DaruLite::Vector do
2
+ let(:dv1) { described_class.new(values1, name: :boozy, index: indexes1) }
3
+ let(:dv2) { described_class.new(values2, name: :mayer, index: indexes2) }
4
+ let(:with_md1) do
5
+ described_class.new([1, 2, 3, nil, 5, nil], name: :missing, index: indexes_with_md1)
6
+ end
7
+ let(:with_md2) do
8
+ described_class.new([1, 2, 3, nil, 5, nil], name: :missing, index: [:obi, :wan, :corona, :a, :b, :c])
9
+ end
10
+ let(:values1) { [1, 2, 3, 4] }
11
+ let(:values2) { [1, 2, 3, 4] }
12
+ let(:indexes1) { [:bud, :kf, :henie, :corona] }
13
+ let(:indexes2) { [:obi, :wan, :kf, :corona] }
14
+ let(:indexes1_and_2) { [:bud, :corona, :henie, :kf, :obi, :wan] }
15
+ let(:indexes_with_md1) { [:a, :b, :c, :obi, :wan, :corona] }
16
+ let(:indexes_with_md1_and_2) { [:a, :b, :c, :corona, :obi, :wan] }
17
+
18
+ describe "#+" do
19
+ it "adds matching indexes of the other vector" do
20
+ expect(dv1 + dv2).to eq(
21
+ DaruLite::Vector.new([nil, 8, nil, 5, nil, nil], name: :boozy, index: indexes1_and_2)
22
+ )
23
+ end
24
+
25
+ it "adds number to each element of the entire vector" do
26
+ expect(dv1 + 5).to eq(DaruLite::Vector.new(values1.map { |v| v + 5 }, name: :boozy, index: indexes1))
27
+ end
28
+
29
+ it "does not add when a number is being added" do
30
+ expect(with_md1 + 1).to eq(
31
+ DaruLite::Vector.new([2, 3, 4, nil, 6, nil], name: :missing, index: indexes_with_md1)
32
+ )
33
+ end
34
+
35
+ it "puts a nil when one of the operands is nil" do
36
+ expect(with_md1 + with_md2).to eq(
37
+ DaruLite::Vector.new([nil, 7, nil, nil, nil, 7], name: :missing, index: indexes_with_md1_and_2)
38
+ )
39
+ end
40
+
41
+ context 'when vectors have numeric and non-numeric indexes' do
42
+ let(:indexes1) { nil }
43
+ let(:indexes2) { [:a, :b, :c, :d] }
44
+
45
+ it "appropriately adds vectors with numeric and non-numeric indexes" do
46
+ expect(dv1 + dv2).to eq(DaruLite::Vector.new(Array.new(6), index: [0, 1, 2, 3] + indexes2))
47
+ end
48
+ end
49
+
50
+ context 'when index contains symbols and strings' do
51
+ let(:indexes1) { [:bud, 'kf', :henie, :corona] }
52
+ let(:indexes2) { [:obi, :wan, 'kf', :corona] }
53
+
54
+ it "adds matching indexes of the other vector" do
55
+ expect(dv1 + dv2).to eq(
56
+ DaruLite::Vector.new([nil, 8, nil, 5, nil, nil], name: :boozy, index: [:bud, :corona, :henie, 'kf', :obi, :wan])
57
+ )
58
+ end
59
+ end
60
+ end
61
+
62
+ describe "#-" do
63
+ it "subtracts matching indexes of the other vector" do
64
+ expect(dv1 - dv2).to eq(
65
+ DaruLite::Vector.new([nil, 0, nil, -1, nil, nil], name: :boozy, index: indexes1_and_2)
66
+ )
67
+ end
68
+
69
+ it "subtracts number from each element of the entire vector" do
70
+ expect(dv1 - 5).to eq(DaruLite::Vector.new(values1.map { |v| v - 5 }, name: :boozy, index: indexes1))
71
+ end
72
+ end
73
+
74
+ describe "#*" do
75
+ it "multiplies matching indexes of the other vector" do
76
+ expect(dv1 * dv2).to eq(
77
+ DaruLite::Vector.new([nil, 16, nil, 6, nil, nil], name: :boozy, index: indexes1_and_2)
78
+ )
79
+ end
80
+
81
+ it "multiplies number to each element of the entire vector" do
82
+ expect(dv1 * 5).to eq(DaruLite::Vector.new(values1.map { |v| v * 5 }, name: :boozy, index: indexes1))
83
+ end
84
+ end
85
+
86
+ describe "#\/" do
87
+ let(:values2) { [1.0, 2.0, 3.0, 4.0] }
88
+
89
+ it "divides matching indexes of the other vector" do
90
+ expect(dv1 / dv2).to eq(
91
+ DaruLite::Vector.new([nil, 1.0, nil, 2 / 3.to_f, nil, nil], name: :boozy, index: indexes1_and_2)
92
+ )
93
+ end
94
+
95
+ it "divides number from each element of the entire vector" do
96
+ expect(dv1 / 5.0).to eq(DaruLite::Vector.new(values1.map { |v| v / 5.0 }, name: :boozy, index: indexes1))
97
+ end
98
+ end
99
+
100
+ describe "#%" do
101
+ it "applies % to matching indexes of the other vector" do
102
+ expect(dv1 % dv2).to eq(DaruLite::Vector.new([nil, 0, nil, 2, nil, nil], name: :boozy, index: indexes1_and_2))
103
+ end
104
+
105
+ it "applies % for each element of the entire vector" do
106
+ expect(dv1 % 5).to eq(
107
+ DaruLite::Vector.new(values1.map { |v| v % 5 }, name: :boozy, index: indexes1)
108
+ )
109
+ end
110
+ end
111
+
112
+ describe "#**" do
113
+ it "applies ** to matching indexes of the other vector" do
114
+ expect(dv1 ** dv2).to eq(DaruLite::Vector.new([nil, 256, nil, 8, nil, nil], name: :boozy, index: indexes1_and_2))
115
+ end
116
+
117
+ it "applies ** for each element of the entire vector" do
118
+ expect(dv1 ** 5).to eq(DaruLite::Vector.new(values1.map { |v| v ** 5 }, name: :boozy, index: indexes1))
119
+ end
120
+ end
121
+
122
+ describe "#exp" do
123
+ it "calculates exp of all numbers" do
124
+ expect(with_md1.exp.round(3)).to eq(
125
+ DaruLite::Vector.new(
126
+ [2.718281828459045, 7.38905609893065, 20.085536923187668, nil, 148.4131591025766, nil],
127
+ index: indexes_with_md1,
128
+ name: :missing
129
+ ).round(3)
130
+ )
131
+ end
132
+ end
133
+
134
+ describe "#add" do
135
+ it "adds two vectors with nils as 0 if skipnil is true" do
136
+ expect(with_md1.add(with_md2, skipnil: true)).to eq(
137
+ DaruLite::Vector.new([1, 7, 3, 3, 1, 7], name: :missing, index: indexes_with_md1_and_2)
138
+ )
139
+ end
140
+
141
+ it "adds two vectors same as :+ if skipnil is false" do
142
+ expect(with_md1.add(with_md2, skipnil: false)).to eq(
143
+ DaruLite::Vector.new([nil, 7, nil, nil, nil, 7], name: :missing, index: indexes_with_md1_and_2)
144
+ )
145
+ end
146
+ end
147
+
148
+ describe "#abs" do
149
+ it "calculates abs value" do
150
+ with_md1.abs
151
+ end
152
+ end
153
+
154
+ describe "#sqrt" do
155
+ it "calculates sqrt" do
156
+ with_md1.sqrt
157
+ end
158
+ end
159
+
160
+ describe "#round" do
161
+ it "rounds to given precision" do
162
+ with_md1.round(2)
163
+ end
164
+ end
165
+ end
@@ -0,0 +1,178 @@
1
+ describe DaruLite::DataFrame do
2
+ before do
3
+ @df = DaruLite::DataFrame.new({
4
+ a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'],
5
+ b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
6
+ c: ['small','large','large','small','small','large','small','large','small'],
7
+ d: [1,2,2,3,3,4,5,6,7],
8
+ e: [2,4,4,6,6,8,10,12,14],
9
+ f: [10,20,20,30,30,40,50,60,70]
10
+ })
11
+ end
12
+
13
+ context "#mean" do
14
+ it "calculates mean of single level numeric only vectors and returns values in a Vector" do
15
+ expect(@df.mean.round(2)).to eq(DaruLite::Vector.new([3.67, 7.33, 36.67],
16
+ index: [:d, :e, :f]
17
+ ))
18
+ end
19
+
20
+ it "calculates mean of multi level numeric only vectors and returns values in a DataFrame" do
21
+ # TODO - pending
22
+ end
23
+ end
24
+
25
+ context "#variance_sample" do
26
+ it "calculates variance of single level numeric only vectors and returns values in a Vector" do
27
+ expect(@df.variance_sample).to eq(DaruLite::Vector.new([4.0, 16.0, 400.0], index: [:d, :e, :f]))
28
+ end
29
+ end
30
+
31
+ context "#std" do
32
+ it "calculates standard deviation of single level numeric only vectors and returns values in a Vector" do
33
+ expect(@df.std).to eq(DaruLite::Vector.new([2, 4, 20], index: [:d, :e, :f]))
34
+ end
35
+ end
36
+
37
+ context "#sum" do
38
+ it "calculates sum of single level numeric only vectors and returns values in a Vector" do
39
+ expect(@df.sum).to eq(DaruLite::Vector.new([33, 66, 330], index: [:d, :e, :f]))
40
+ end
41
+ end
42
+
43
+ context "#count" do
44
+ it "counts number of non-nil single level numeric only vectors and returns values in a Vector" do
45
+ expect(@df.count).to eq(DaruLite::Vector.new([9, 9, 9], index: [:d, :e, :f]))
46
+ end
47
+ end
48
+
49
+ context "#mode" do
50
+ it "calculates mode of single level numeric only vectors and returns values in a Vector" do
51
+ expect(@df.mode).to eq(DaruLite::Vector.new([DaruLite::Vector.new([2,3]), DaruLite::Vector.new([4,6]), DaruLite::Vector.new([20,30])], index: [:d, :e, :f]))
52
+ end
53
+ end
54
+
55
+ context "#median" do
56
+ it "calculates median of single level numeric only vectors and returns values in a Vector" do
57
+ expect(@df.median).to eq(DaruLite::Vector.new([3, 6, 30], index: [:d, :e, :f]))
58
+ end
59
+ end
60
+
61
+ context "#max" do
62
+ it "returns the row that has max" do
63
+ df = DaruLite::DataFrame.new({
64
+ a: [1,2,3,4,5],
65
+ b: ['aa','aaa','a','','dfffdf'],
66
+ c: [11,22,33,44,55]
67
+ })
68
+ expect(df.max(vector: :b)).to eq(
69
+ DaruLite::Vector.new([5,'dfffdf',55], index: [:a, :b, :c]))
70
+ end
71
+ end
72
+
73
+ context "#min" do
74
+ it "calculates mininum of single level numeric only vectors and returns values in a Vector" do
75
+ expect(@df.min).to eq(DaruLite::Vector.new([1, 2, 10], index: [:d, :e, :f]))
76
+ end
77
+ end
78
+
79
+ context "#range" do
80
+ it "calculates range of single level numeric only vectors and returns values in a Vector" do
81
+ expect(@df.range).to eq(DaruLite::Vector.new([6, 12, 60], index: [:d, :e, :f]))
82
+ end
83
+ end
84
+
85
+ context "#product" do
86
+ it "calculates product of single level numeric only vectors and returns values in a Vector" do
87
+ expect(@df.product).to eq(DaruLite::Vector.new([30240, 15482880, 30240000000000], index: [:d, :e, :f]))
88
+ end
89
+ end
90
+
91
+ context "#describe" do
92
+ it "generates mean, std, max, min and count of numeric vectors in one shot" do
93
+ expect(@df.describe.round(2)).to eq(DaruLite::DataFrame.new({
94
+ d: [9.00, 3.67 ,2.00 , 1.00, 7.00],
95
+ e: [9.00, 7.33 ,4.00 , 2.00, 14.00],
96
+ f: [9.00, 36.67,20.00,10.00, 70.00]
97
+ }, index: [:count, :mean, :std, :min, :max]
98
+ ))
99
+ end
100
+ end
101
+
102
+ context "percent_change" do
103
+ it "calculates percent change of numeric vectors" do
104
+ expect(@df.percent_change.round(2)).to eq(DaruLite::DataFrame.new({
105
+ d: [nil, 1.0, 0.0, 0.5, 0.0, 0.33, 0.25, 0.2, 0.17],
106
+ e: [nil, 1.0, 0.0, 0.5, 0.0, 0.33, 0.25, 0.2, 0.17],
107
+ f: [nil, 1.0, 0.0, 0.5, 0.0, 0.33, 0.25, 0.2, 0.17] }
108
+ ))
109
+ end
110
+ end
111
+
112
+ context "#cov" do
113
+ it "calculates the variance covariance of the numeric vectors of DataFrame" do
114
+ expect(@df.cov).to eq(DaruLite::DataFrame.new({
115
+ d: [4,8,40],
116
+ e: [8,16,80],
117
+ f: [40,80,400]
118
+ }, index: [:d, :e, :f]
119
+ ))
120
+
121
+ test = DaruLite::DataFrame.rows([
122
+ [0.3543,0.4535,0.2424],
123
+ [0.123,0.53323,0.544],
124
+ [0.4345,0.4552,0.425]
125
+ ], order: [:a, :b, :c])
126
+ ans = DaruLite::DataFrame.new({
127
+ a: [0.0261607, -0.0071019, -0.0153640],
128
+ b: [-0.0071019, 0.0020747, 0.0056071],
129
+ c: [-0.0153640, 0.0056071, 0.0230777]
130
+ })
131
+
132
+ test.cov.each_vector_with_index do |v, i|
133
+ expect_correct_vector_in_delta v, ans[i], 0.01
134
+ end
135
+ end
136
+ end
137
+
138
+ context "#corr" do
139
+ it "calculates the correlation between the numeric vectors of DataFrame" do
140
+ expect(@df.corr).to eq(DaruLite::DataFrame.new({
141
+ d: [1,1,1],
142
+ e: [1,1,1],
143
+ f: [1,1,1]
144
+ }, index: [:d, :e, :f]
145
+ ))
146
+ end
147
+ end
148
+
149
+ context "#cumsum" do
150
+ it "calculates cumulative sum of numeric vectors" do
151
+ answer = DaruLite::DataFrame.new({
152
+ d: [1,3,5,8,11,15,20,26,33],
153
+ e: [2,6,10,16,22,30,40,52,66],
154
+ f: [10,30,50,80,110,150,200,260,330]
155
+ })
156
+ expect(@df.cumsum).to eq(answer)
157
+ end
158
+ end
159
+
160
+ context "#rolling_mean" do
161
+ it "calculates rolling mean" do
162
+ v = DaruLite::Vector.new([17.28, 17.45, 17.84, 17.74, 17.82, 17.85, 17.36, 17.3, 17.56, 17.49, 17.46, 17.4, 17.03, 17.01,16.86, 16.86, 16.56, 16.36, 16.66, 16.77])
163
+ df = DaruLite::DataFrame.new({ a: v, b: v, c: v })
164
+ answer = df.rolling_mean
165
+
166
+ expect(answer[:a][-1]) .to be_within(0.001).of(16.897)
167
+ expect(answer[:b][-5]) .to be_within(0.001).of(17.233)
168
+ expect(answer[:c][-10]).to be_within(0.001).of(17.587)
169
+ end
170
+ end
171
+
172
+ context "#standardize" do
173
+ it "standardizes" do
174
+ # TODO: Write this test.
175
+ @df.standardize
176
+ end
177
+ end
178
+ end