daru_lite 0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.github/workflows/ci.yml +33 -0
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/.rubocop.yml +27 -0
- data/.rubocop_todo.yml +137 -0
- data/CONTRIBUTING.md +47 -0
- data/Gemfile +2 -0
- data/History.md +4 -0
- data/LICENSE +24 -0
- data/README.md +218 -0
- data/Rakefile +69 -0
- data/ReleasePolicy.md +20 -0
- data/benchmarks/TradeoffData.csv +65 -0
- data/benchmarks/csv_reading.rb +22 -0
- data/benchmarks/dataframe_creation.rb +39 -0
- data/benchmarks/db_loading.rb +34 -0
- data/benchmarks/duplicating.rb +45 -0
- data/benchmarks/group_by.rb +32 -0
- data/benchmarks/joining.rb +52 -0
- data/benchmarks/row_access.rb +41 -0
- data/benchmarks/row_assign.rb +36 -0
- data/benchmarks/sorting.rb +51 -0
- data/benchmarks/statistics.rb +28 -0
- data/benchmarks/vector_access.rb +31 -0
- data/benchmarks/vector_assign.rb +42 -0
- data/benchmarks/where_clause.rb +48 -0
- data/benchmarks/where_vs_filter.rb +28 -0
- data/daru_lite.gemspec +55 -0
- data/images/README.md +5 -0
- data/images/con0.png +0 -0
- data/images/con1.png +0 -0
- data/images/init0.png +0 -0
- data/images/init1.png +0 -0
- data/images/man0.png +0 -0
- data/images/man1.png +0 -0
- data/images/man2.png +0 -0
- data/images/man3.png +0 -0
- data/images/man4.png +0 -0
- data/images/man5.png +0 -0
- data/images/man6.png +0 -0
- data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
- data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
- data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
- data/lib/daru_lite/category.rb +929 -0
- data/lib/daru_lite/configuration.rb +34 -0
- data/lib/daru_lite/core/group_by.rb +403 -0
- data/lib/daru_lite/core/merge.rb +270 -0
- data/lib/daru_lite/core/query.rb +109 -0
- data/lib/daru_lite/dataframe.rb +3080 -0
- data/lib/daru_lite/date_time/index.rb +569 -0
- data/lib/daru_lite/date_time/offsets.rb +397 -0
- data/lib/daru_lite/exceptions.rb +2 -0
- data/lib/daru_lite/extensions/which_dsl.rb +53 -0
- data/lib/daru_lite/formatters/table.rb +52 -0
- data/lib/daru_lite/helpers/array.rb +53 -0
- data/lib/daru_lite/index/categorical_index.rb +201 -0
- data/lib/daru_lite/index/index.rb +374 -0
- data/lib/daru_lite/index/multi_index.rb +374 -0
- data/lib/daru_lite/io/csv/converters.rb +21 -0
- data/lib/daru_lite/io/io.rb +294 -0
- data/lib/daru_lite/io/sql_data_source.rb +97 -0
- data/lib/daru_lite/iruby/helpers.rb +38 -0
- data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
- data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
- data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
- data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
- data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
- data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
- data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
- data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
- data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
- data/lib/daru_lite/monkeys.rb +56 -0
- data/lib/daru_lite/vector.rb +1678 -0
- data/lib/daru_lite/version.rb +3 -0
- data/lib/daru_lite.rb +99 -0
- data/profile/_base.rb +23 -0
- data/profile/df_to_a.rb +10 -0
- data/profile/filter.rb +13 -0
- data/profile/joining.rb +13 -0
- data/profile/sorting.rb +12 -0
- data/profile/vector_each_with_index.rb +9 -0
- data/profile/vector_new.rb +9 -0
- data/spec/accessors/array_wrapper_spec.rb +3 -0
- data/spec/category_spec.rb +1741 -0
- data/spec/core/group_by_spec.rb +655 -0
- data/spec/core/merge_spec.rb +179 -0
- data/spec/core/query_spec.rb +347 -0
- data/spec/daru_lite_spec.rb +22 -0
- data/spec/dataframe_spec.rb +4330 -0
- data/spec/date_time/data_spec.rb +197 -0
- data/spec/date_time/date_time_index_helper_spec.rb +72 -0
- data/spec/date_time/index_spec.rb +588 -0
- data/spec/date_time/offsets_spec.rb +465 -0
- data/spec/extensions/which_dsl_spec.rb +38 -0
- data/spec/fixtures/bank2.dat +200 -0
- data/spec/fixtures/boolean_converter_test.csv +5 -0
- data/spec/fixtures/countries.json +7794 -0
- data/spec/fixtures/duplicates.csv +32 -0
- data/spec/fixtures/eciresults.html +394 -0
- data/spec/fixtures/empties.dat +2 -0
- data/spec/fixtures/empty_rows_test.csv +17 -0
- data/spec/fixtures/macau.html +3691 -0
- data/spec/fixtures/macd_data.csv +150 -0
- data/spec/fixtures/matrix_test.csv +100 -0
- data/spec/fixtures/moneycontrol.html +6812 -0
- data/spec/fixtures/music_data.tsv +2501 -0
- data/spec/fixtures/repeated_fields.csv +7 -0
- data/spec/fixtures/sales-funnel.csv +18 -0
- data/spec/fixtures/scientific_notation.csv +4 -0
- data/spec/fixtures/string_converter_test.csv +5 -0
- data/spec/fixtures/strings.dat +2 -0
- data/spec/fixtures/test_xls.xls +0 -0
- data/spec/fixtures/test_xls_2.xls +0 -0
- data/spec/fixtures/url_test.txt~ +0 -0
- data/spec/fixtures/valid_markup.html +62 -0
- data/spec/fixtures/wiki_climate.html +1243 -0
- data/spec/fixtures/wiki_table_info.html +631 -0
- data/spec/formatters/table_formatter_spec.rb +137 -0
- data/spec/helpers_spec.rb +8 -0
- data/spec/index/categorical_index_spec.rb +170 -0
- data/spec/index/index_spec.rb +417 -0
- data/spec/index/multi_index_spec.rb +680 -0
- data/spec/io/io_spec.rb +373 -0
- data/spec/io/sql_data_source_spec.rb +56 -0
- data/spec/iruby/dataframe_spec.rb +170 -0
- data/spec/iruby/helpers_spec.rb +49 -0
- data/spec/iruby/multi_index_spec.rb +37 -0
- data/spec/iruby/vector_spec.rb +105 -0
- data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
- data/spec/maths/arithmetic/vector_spec.rb +165 -0
- data/spec/maths/statistics/dataframe_spec.rb +178 -0
- data/spec/maths/statistics/vector_spec.rb +756 -0
- data/spec/monkeys_spec.rb +42 -0
- data/spec/shared/vector_display_spec.rb +213 -0
- data/spec/spec_helper.rb +87 -0
- data/spec/support/database_helper.rb +30 -0
- data/spec/support/matchers.rb +5 -0
- data/spec/vector_spec.rb +2293 -0
- metadata +571 -0
@@ -0,0 +1,105 @@
|
|
1
|
+
describe DaruLite::Vector, '#to_html' do
|
2
|
+
[nil, :category].each do |type|
|
3
|
+
let(:doc) { Nokogiri::HTML(vector.to_html) }
|
4
|
+
subject(:table) { doc.at('table') }
|
5
|
+
let(:header) { doc.at('b') }
|
6
|
+
|
7
|
+
context 'simple' do
|
8
|
+
let(:vector) { DaruLite::Vector.new [1,nil,3],
|
9
|
+
index: [:a, :b, :c], name: 'test', type: type }
|
10
|
+
it { is_expected.not_to be_nil }
|
11
|
+
|
12
|
+
describe 'header' do
|
13
|
+
subject { header }
|
14
|
+
it { is_expected.not_to be_nil }
|
15
|
+
its(:text) { is_expected.to eq " DaruLite::Vector(3)"\
|
16
|
+
"#{":category" if type == :category} " }
|
17
|
+
end
|
18
|
+
|
19
|
+
describe 'name' do
|
20
|
+
subject(:name) { table.at('tr:nth-child(1) > th:nth-child(2)') }
|
21
|
+
it { is_expected.not_to be_nil }
|
22
|
+
its(:text) { is_expected.to eq 'test' }
|
23
|
+
|
24
|
+
context 'withought name' do
|
25
|
+
let(:vector) { DaruLite::Vector.new [1,nil,3], index: [:a, :b, :c], type: type }
|
26
|
+
|
27
|
+
it { is_expected.to be_nil }
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
describe 'index' do
|
32
|
+
subject(:indexes) { table.search('tr > td:first-child').map(&:text) }
|
33
|
+
its(:count) { is_expected.to eq vector.size }
|
34
|
+
it { is_expected.to eq vector.index.to_a.map(&:to_s) }
|
35
|
+
end
|
36
|
+
|
37
|
+
describe 'values' do
|
38
|
+
subject(:indexes) { table.search('tr > td:last-child').map(&:text) }
|
39
|
+
its(:count) { is_expected.to eq vector.size }
|
40
|
+
it { is_expected.to eq vector.to_a.map(&:to_s) }
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
context 'large vector' do
|
45
|
+
subject(:vector) { DaruLite::Vector.new [1,2,3] * 100, name: 'test', type: type }
|
46
|
+
it 'has only 30 rows (+ 1 header rows, + 2 finishing rows)' do
|
47
|
+
expect(table.search('tr').size).to eq 33
|
48
|
+
end
|
49
|
+
|
50
|
+
describe '"skipped" row' do
|
51
|
+
subject(:row) { table.search('tr:nth-child(31) td').map(&:text) }
|
52
|
+
its(:count) { is_expected.to eq 2 }
|
53
|
+
it { is_expected.to eq ['...', '...'] }
|
54
|
+
end
|
55
|
+
|
56
|
+
describe 'last row' do
|
57
|
+
subject(:row) { table.search('tr:nth-child(32) td').map(&:text) }
|
58
|
+
its(:count) { is_expected.to eq 2 }
|
59
|
+
it { is_expected.to eq ['299', '3'] }
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
context 'multi-index' do
|
64
|
+
subject(:vector) {
|
65
|
+
DaruLite::Vector.new(
|
66
|
+
[1,2,3,4,5,6,7],
|
67
|
+
name: 'test',
|
68
|
+
type: type,
|
69
|
+
index: DaruLite::MultiIndex.from_tuples([
|
70
|
+
%w[foo one],
|
71
|
+
%w[foo two],
|
72
|
+
%w[foo three],
|
73
|
+
%w[bar one],
|
74
|
+
%w[bar two],
|
75
|
+
%w[bar three],
|
76
|
+
%w[baz one],
|
77
|
+
]),
|
78
|
+
)
|
79
|
+
}
|
80
|
+
|
81
|
+
describe 'header' do
|
82
|
+
subject { header }
|
83
|
+
it { is_expected.not_to be_nil }
|
84
|
+
its(:text) { is_expected.to eq " DaruLite::Vector(7)"\
|
85
|
+
"#{":category" if type == :category} " }
|
86
|
+
end
|
87
|
+
|
88
|
+
describe 'name row' do
|
89
|
+
subject(:row) { table.at('tr:nth-child(1)').search('th') }
|
90
|
+
its(:count) { should == 2 }
|
91
|
+
it { expect(row.first['colspan']).to eq '2' }
|
92
|
+
end
|
93
|
+
|
94
|
+
describe 'first data row' do
|
95
|
+
let(:row) { table.at('tbody > tr:first-child') }
|
96
|
+
subject { row.inner_html.scan(/<t[dh].+?<\/t[dh]>/) }
|
97
|
+
it { is_expected.to eq [
|
98
|
+
'<th rowspan="3">foo</th>',
|
99
|
+
'<th rowspan="1">one</th>',
|
100
|
+
'<td>1</td>'
|
101
|
+
]}
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,148 @@
|
|
1
|
+
describe DaruLite::DataFrame do
|
2
|
+
before(:each) do
|
3
|
+
@df = DaruLite::DataFrame.new({a: [1,2,3,4,5], b: ['a','e','i','o','u'],
|
4
|
+
c: [10,20,30,40,50]})
|
5
|
+
@left = DaruLite::DataFrame.new({a: [1,nil,nil,4], b: [10,nil,nil,40], c: [5,6,7,8]},
|
6
|
+
index: [0,4,5,3])
|
7
|
+
@right = DaruLite::DataFrame.new({a: [1,2,3,4,5], b: [10,20,30,40,50]},
|
8
|
+
index: [0,1,2,3,6])
|
9
|
+
end
|
10
|
+
|
11
|
+
context "#+" do
|
12
|
+
it "adds a number to all numeric vectors" do
|
13
|
+
expect(@df + 2).to eq(DaruLite::DataFrame.new({a: [3,4,5,6,7], b: ['a','e','i','o','u'],
|
14
|
+
c: [12,22,32,42,52] }))
|
15
|
+
end
|
16
|
+
|
17
|
+
it "adds two dataframes to produce a third" do
|
18
|
+
expect(@left + @right).to eq(DaruLite::DataFrame.new({
|
19
|
+
a: [2,nil,nil,8,nil,nil],
|
20
|
+
b: [20,nil,nil,80,nil,nil],
|
21
|
+
c: [nil,nil,nil,nil,nil,nil]
|
22
|
+
}, index: [0,1,2,3,4,5,6]))
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
context "#-" do
|
27
|
+
it "subtracts a number from all numeric vectors" do
|
28
|
+
expect(@df - 2).to eq(DaruLite::DataFrame.new({
|
29
|
+
a: [-1,0,1,2,3],
|
30
|
+
b: ['a','e','i','o','u'],
|
31
|
+
c: [8,18,28,38,48]}))
|
32
|
+
end
|
33
|
+
|
34
|
+
it "subtracts a data frame from another" do
|
35
|
+
expect(@left - @right).to eq(DaruLite::DataFrame.new({
|
36
|
+
a: [0,nil,nil,0,nil,nil],
|
37
|
+
b: [0,nil,nil,0,nil,nil],
|
38
|
+
c: [nil,nil,nil,nil,nil,nil]
|
39
|
+
}, index: [0,1,2,3,4,5,6]))
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
context "#*" do
|
44
|
+
it "multiplies all numeric vectors by number" do
|
45
|
+
expect(@df * 2).to eq(DaruLite::DataFrame.new({a: [2,4,6,8,10], b: ['a','e','i','o','u'],
|
46
|
+
c: [20,40,60,80,100] }))
|
47
|
+
end
|
48
|
+
|
49
|
+
it "multipies two dataframes to produce a third" do
|
50
|
+
expect(@left * @right).to eq(DaruLite::DataFrame.new({
|
51
|
+
a: [1,nil,nil,16,nil,nil],
|
52
|
+
b: [100,nil,nil,1600,nil,nil],
|
53
|
+
c: [nil,nil,nil,nil,nil,nil]
|
54
|
+
}, index: [0,1,2,3,4,5,6]))
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
context "#/" do
|
59
|
+
it "divides all numeric vectors by number" do
|
60
|
+
expect(@df / 2.0).to eq(DaruLite::DataFrame.new({a: [0.5, 1, 1.5, 2, 2.5], b: ['a','e','i','o','u'],
|
61
|
+
c: [5,10,15,20,25] }))
|
62
|
+
end
|
63
|
+
|
64
|
+
it "multipies two dataframes to produce a third" do
|
65
|
+
# NB: this and other tests of two DF interactions are not EXTREMELY
|
66
|
+
# useful, but to know that nothing unexpected emerges here
|
67
|
+
expect(@left / @right).to eq(DaruLite::DataFrame.new({
|
68
|
+
a: [1,nil,nil,1,nil,nil],
|
69
|
+
b: [1,nil,nil,1,nil,nil],
|
70
|
+
c: [nil,nil,nil,nil,nil,nil]
|
71
|
+
}, index: [0,1,2,3,4,5,6]))
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
context "#%" do
|
76
|
+
it "divides all numeric vectors by number and returns reminder" do
|
77
|
+
expect(@df % 3).to eq(DaruLite::DataFrame.new({a: [1, 2, 0, 1, 2], b: ['a','e','i','o','u'],
|
78
|
+
c: [1, 2, 0, 1, 2] }))
|
79
|
+
end
|
80
|
+
|
81
|
+
it "returns reminder of per-item division" do
|
82
|
+
expect(@left % @right).to eq(DaruLite::DataFrame.new({
|
83
|
+
a: [0,nil,nil,0,nil,nil],
|
84
|
+
b: [0,nil,nil,0,nil,nil],
|
85
|
+
c: [nil,nil,nil,nil,nil,nil]
|
86
|
+
}, index: [0,1,2,3,4,5,6]))
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
context "#**" do
|
91
|
+
it "calculates result of each numeric value pow" do
|
92
|
+
expect(@df ** 2).to eq(DaruLite::DataFrame.new({a: [1, 4, 9, 16, 25], b: ['a','e','i','o','u'],
|
93
|
+
c: [100, 400, 900, 1600, 2500] }))
|
94
|
+
end
|
95
|
+
|
96
|
+
it "returns per-item pow" do
|
97
|
+
expect(@left ** @right).to eq(DaruLite::DataFrame.new({
|
98
|
+
a: [1,nil,nil,4**4,nil,nil],
|
99
|
+
b: [10**10,nil,nil,40**40,nil,nil],
|
100
|
+
c: [nil,nil,nil,nil,nil,nil]
|
101
|
+
}, index: [0,1,2,3,4,5,6]))
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
context "#sqrt" do
|
106
|
+
it "calculates sqrt" do
|
107
|
+
expect_correct_df_in_delta(@df.sqrt,
|
108
|
+
DaruLite::DataFrame.new({
|
109
|
+
a: [1.0,1.41421356,1.73205080,2.0,2.23606797],
|
110
|
+
c: [3.16227766, 4.47213595 ,5.47722557 ,6.32455532, 7.07106781]
|
111
|
+
}), 0.001
|
112
|
+
)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
context "#round" do
|
117
|
+
it "rounds to precision" do
|
118
|
+
df = DaruLite::DataFrame.new({
|
119
|
+
a: [1.3434,2.4332,5.6655,12.3344,32.233],
|
120
|
+
b: [1.3434,2.4332,5.6655,12.3344,32.233],
|
121
|
+
c: %w(a b c d e)
|
122
|
+
})
|
123
|
+
ans = DaruLite::DataFrame.new({
|
124
|
+
a: [1.34,2.43,5.67,12.33,32.23],
|
125
|
+
b: [1.34,2.43,5.67,12.33,32.23],
|
126
|
+
})
|
127
|
+
|
128
|
+
expect(df.round(2)).to eq(ans)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
context "#exp" do
|
133
|
+
it "calculates exponential" do
|
134
|
+
e = Math::E
|
135
|
+
df = DaruLite::DataFrame.new({
|
136
|
+
a: [1,2,3],
|
137
|
+
b: [4,5,6],
|
138
|
+
c: %w(a b c)
|
139
|
+
})
|
140
|
+
ans = DaruLite::DataFrame.new({
|
141
|
+
a: [e, e**2, e**3],
|
142
|
+
b: [e**4, e**5, e**6],
|
143
|
+
})
|
144
|
+
|
145
|
+
expect_correct_df_in_delta(df.exp, ans, 0.0001)
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
@@ -0,0 +1,165 @@
|
|
1
|
+
describe DaruLite::Vector do
|
2
|
+
let(:dv1) { described_class.new(values1, name: :boozy, index: indexes1) }
|
3
|
+
let(:dv2) { described_class.new(values2, name: :mayer, index: indexes2) }
|
4
|
+
let(:with_md1) do
|
5
|
+
described_class.new([1, 2, 3, nil, 5, nil], name: :missing, index: indexes_with_md1)
|
6
|
+
end
|
7
|
+
let(:with_md2) do
|
8
|
+
described_class.new([1, 2, 3, nil, 5, nil], name: :missing, index: [:obi, :wan, :corona, :a, :b, :c])
|
9
|
+
end
|
10
|
+
let(:values1) { [1, 2, 3, 4] }
|
11
|
+
let(:values2) { [1, 2, 3, 4] }
|
12
|
+
let(:indexes1) { [:bud, :kf, :henie, :corona] }
|
13
|
+
let(:indexes2) { [:obi, :wan, :kf, :corona] }
|
14
|
+
let(:indexes1_and_2) { [:bud, :corona, :henie, :kf, :obi, :wan] }
|
15
|
+
let(:indexes_with_md1) { [:a, :b, :c, :obi, :wan, :corona] }
|
16
|
+
let(:indexes_with_md1_and_2) { [:a, :b, :c, :corona, :obi, :wan] }
|
17
|
+
|
18
|
+
describe "#+" do
|
19
|
+
it "adds matching indexes of the other vector" do
|
20
|
+
expect(dv1 + dv2).to eq(
|
21
|
+
DaruLite::Vector.new([nil, 8, nil, 5, nil, nil], name: :boozy, index: indexes1_and_2)
|
22
|
+
)
|
23
|
+
end
|
24
|
+
|
25
|
+
it "adds number to each element of the entire vector" do
|
26
|
+
expect(dv1 + 5).to eq(DaruLite::Vector.new(values1.map { |v| v + 5 }, name: :boozy, index: indexes1))
|
27
|
+
end
|
28
|
+
|
29
|
+
it "does not add when a number is being added" do
|
30
|
+
expect(with_md1 + 1).to eq(
|
31
|
+
DaruLite::Vector.new([2, 3, 4, nil, 6, nil], name: :missing, index: indexes_with_md1)
|
32
|
+
)
|
33
|
+
end
|
34
|
+
|
35
|
+
it "puts a nil when one of the operands is nil" do
|
36
|
+
expect(with_md1 + with_md2).to eq(
|
37
|
+
DaruLite::Vector.new([nil, 7, nil, nil, nil, 7], name: :missing, index: indexes_with_md1_and_2)
|
38
|
+
)
|
39
|
+
end
|
40
|
+
|
41
|
+
context 'when vectors have numeric and non-numeric indexes' do
|
42
|
+
let(:indexes1) { nil }
|
43
|
+
let(:indexes2) { [:a, :b, :c, :d] }
|
44
|
+
|
45
|
+
it "appropriately adds vectors with numeric and non-numeric indexes" do
|
46
|
+
expect(dv1 + dv2).to eq(DaruLite::Vector.new(Array.new(6), index: [0, 1, 2, 3] + indexes2))
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
context 'when index contains symbols and strings' do
|
51
|
+
let(:indexes1) { [:bud, 'kf', :henie, :corona] }
|
52
|
+
let(:indexes2) { [:obi, :wan, 'kf', :corona] }
|
53
|
+
|
54
|
+
it "adds matching indexes of the other vector" do
|
55
|
+
expect(dv1 + dv2).to eq(
|
56
|
+
DaruLite::Vector.new([nil, 8, nil, 5, nil, nil], name: :boozy, index: [:bud, :corona, :henie, 'kf', :obi, :wan])
|
57
|
+
)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
describe "#-" do
|
63
|
+
it "subtracts matching indexes of the other vector" do
|
64
|
+
expect(dv1 - dv2).to eq(
|
65
|
+
DaruLite::Vector.new([nil, 0, nil, -1, nil, nil], name: :boozy, index: indexes1_and_2)
|
66
|
+
)
|
67
|
+
end
|
68
|
+
|
69
|
+
it "subtracts number from each element of the entire vector" do
|
70
|
+
expect(dv1 - 5).to eq(DaruLite::Vector.new(values1.map { |v| v - 5 }, name: :boozy, index: indexes1))
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
describe "#*" do
|
75
|
+
it "multiplies matching indexes of the other vector" do
|
76
|
+
expect(dv1 * dv2).to eq(
|
77
|
+
DaruLite::Vector.new([nil, 16, nil, 6, nil, nil], name: :boozy, index: indexes1_and_2)
|
78
|
+
)
|
79
|
+
end
|
80
|
+
|
81
|
+
it "multiplies number to each element of the entire vector" do
|
82
|
+
expect(dv1 * 5).to eq(DaruLite::Vector.new(values1.map { |v| v * 5 }, name: :boozy, index: indexes1))
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
describe "#\/" do
|
87
|
+
let(:values2) { [1.0, 2.0, 3.0, 4.0] }
|
88
|
+
|
89
|
+
it "divides matching indexes of the other vector" do
|
90
|
+
expect(dv1 / dv2).to eq(
|
91
|
+
DaruLite::Vector.new([nil, 1.0, nil, 2 / 3.to_f, nil, nil], name: :boozy, index: indexes1_and_2)
|
92
|
+
)
|
93
|
+
end
|
94
|
+
|
95
|
+
it "divides number from each element of the entire vector" do
|
96
|
+
expect(dv1 / 5.0).to eq(DaruLite::Vector.new(values1.map { |v| v / 5.0 }, name: :boozy, index: indexes1))
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
describe "#%" do
|
101
|
+
it "applies % to matching indexes of the other vector" do
|
102
|
+
expect(dv1 % dv2).to eq(DaruLite::Vector.new([nil, 0, nil, 2, nil, nil], name: :boozy, index: indexes1_and_2))
|
103
|
+
end
|
104
|
+
|
105
|
+
it "applies % for each element of the entire vector" do
|
106
|
+
expect(dv1 % 5).to eq(
|
107
|
+
DaruLite::Vector.new(values1.map { |v| v % 5 }, name: :boozy, index: indexes1)
|
108
|
+
)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
describe "#**" do
|
113
|
+
it "applies ** to matching indexes of the other vector" do
|
114
|
+
expect(dv1 ** dv2).to eq(DaruLite::Vector.new([nil, 256, nil, 8, nil, nil], name: :boozy, index: indexes1_and_2))
|
115
|
+
end
|
116
|
+
|
117
|
+
it "applies ** for each element of the entire vector" do
|
118
|
+
expect(dv1 ** 5).to eq(DaruLite::Vector.new(values1.map { |v| v ** 5 }, name: :boozy, index: indexes1))
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
describe "#exp" do
|
123
|
+
it "calculates exp of all numbers" do
|
124
|
+
expect(with_md1.exp.round(3)).to eq(
|
125
|
+
DaruLite::Vector.new(
|
126
|
+
[2.718281828459045, 7.38905609893065, 20.085536923187668, nil, 148.4131591025766, nil],
|
127
|
+
index: indexes_with_md1,
|
128
|
+
name: :missing
|
129
|
+
).round(3)
|
130
|
+
)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
describe "#add" do
|
135
|
+
it "adds two vectors with nils as 0 if skipnil is true" do
|
136
|
+
expect(with_md1.add(with_md2, skipnil: true)).to eq(
|
137
|
+
DaruLite::Vector.new([1, 7, 3, 3, 1, 7], name: :missing, index: indexes_with_md1_and_2)
|
138
|
+
)
|
139
|
+
end
|
140
|
+
|
141
|
+
it "adds two vectors same as :+ if skipnil is false" do
|
142
|
+
expect(with_md1.add(with_md2, skipnil: false)).to eq(
|
143
|
+
DaruLite::Vector.new([nil, 7, nil, nil, nil, 7], name: :missing, index: indexes_with_md1_and_2)
|
144
|
+
)
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
describe "#abs" do
|
149
|
+
it "calculates abs value" do
|
150
|
+
with_md1.abs
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
describe "#sqrt" do
|
155
|
+
it "calculates sqrt" do
|
156
|
+
with_md1.sqrt
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
describe "#round" do
|
161
|
+
it "rounds to given precision" do
|
162
|
+
with_md1.round(2)
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
@@ -0,0 +1,178 @@
|
|
1
|
+
describe DaruLite::DataFrame do
|
2
|
+
before do
|
3
|
+
@df = DaruLite::DataFrame.new({
|
4
|
+
a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'],
|
5
|
+
b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
|
6
|
+
c: ['small','large','large','small','small','large','small','large','small'],
|
7
|
+
d: [1,2,2,3,3,4,5,6,7],
|
8
|
+
e: [2,4,4,6,6,8,10,12,14],
|
9
|
+
f: [10,20,20,30,30,40,50,60,70]
|
10
|
+
})
|
11
|
+
end
|
12
|
+
|
13
|
+
context "#mean" do
|
14
|
+
it "calculates mean of single level numeric only vectors and returns values in a Vector" do
|
15
|
+
expect(@df.mean.round(2)).to eq(DaruLite::Vector.new([3.67, 7.33, 36.67],
|
16
|
+
index: [:d, :e, :f]
|
17
|
+
))
|
18
|
+
end
|
19
|
+
|
20
|
+
it "calculates mean of multi level numeric only vectors and returns values in a DataFrame" do
|
21
|
+
# TODO - pending
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
context "#variance_sample" do
|
26
|
+
it "calculates variance of single level numeric only vectors and returns values in a Vector" do
|
27
|
+
expect(@df.variance_sample).to eq(DaruLite::Vector.new([4.0, 16.0, 400.0], index: [:d, :e, :f]))
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
context "#std" do
|
32
|
+
it "calculates standard deviation of single level numeric only vectors and returns values in a Vector" do
|
33
|
+
expect(@df.std).to eq(DaruLite::Vector.new([2, 4, 20], index: [:d, :e, :f]))
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
context "#sum" do
|
38
|
+
it "calculates sum of single level numeric only vectors and returns values in a Vector" do
|
39
|
+
expect(@df.sum).to eq(DaruLite::Vector.new([33, 66, 330], index: [:d, :e, :f]))
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
context "#count" do
|
44
|
+
it "counts number of non-nil single level numeric only vectors and returns values in a Vector" do
|
45
|
+
expect(@df.count).to eq(DaruLite::Vector.new([9, 9, 9], index: [:d, :e, :f]))
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
context "#mode" do
|
50
|
+
it "calculates mode of single level numeric only vectors and returns values in a Vector" do
|
51
|
+
expect(@df.mode).to eq(DaruLite::Vector.new([DaruLite::Vector.new([2,3]), DaruLite::Vector.new([4,6]), DaruLite::Vector.new([20,30])], index: [:d, :e, :f]))
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
context "#median" do
|
56
|
+
it "calculates median of single level numeric only vectors and returns values in a Vector" do
|
57
|
+
expect(@df.median).to eq(DaruLite::Vector.new([3, 6, 30], index: [:d, :e, :f]))
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
context "#max" do
|
62
|
+
it "returns the row that has max" do
|
63
|
+
df = DaruLite::DataFrame.new({
|
64
|
+
a: [1,2,3,4,5],
|
65
|
+
b: ['aa','aaa','a','','dfffdf'],
|
66
|
+
c: [11,22,33,44,55]
|
67
|
+
})
|
68
|
+
expect(df.max(vector: :b)).to eq(
|
69
|
+
DaruLite::Vector.new([5,'dfffdf',55], index: [:a, :b, :c]))
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
context "#min" do
|
74
|
+
it "calculates mininum of single level numeric only vectors and returns values in a Vector" do
|
75
|
+
expect(@df.min).to eq(DaruLite::Vector.new([1, 2, 10], index: [:d, :e, :f]))
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
context "#range" do
|
80
|
+
it "calculates range of single level numeric only vectors and returns values in a Vector" do
|
81
|
+
expect(@df.range).to eq(DaruLite::Vector.new([6, 12, 60], index: [:d, :e, :f]))
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
context "#product" do
|
86
|
+
it "calculates product of single level numeric only vectors and returns values in a Vector" do
|
87
|
+
expect(@df.product).to eq(DaruLite::Vector.new([30240, 15482880, 30240000000000], index: [:d, :e, :f]))
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
context "#describe" do
|
92
|
+
it "generates mean, std, max, min and count of numeric vectors in one shot" do
|
93
|
+
expect(@df.describe.round(2)).to eq(DaruLite::DataFrame.new({
|
94
|
+
d: [9.00, 3.67 ,2.00 , 1.00, 7.00],
|
95
|
+
e: [9.00, 7.33 ,4.00 , 2.00, 14.00],
|
96
|
+
f: [9.00, 36.67,20.00,10.00, 70.00]
|
97
|
+
}, index: [:count, :mean, :std, :min, :max]
|
98
|
+
))
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
context "percent_change" do
|
103
|
+
it "calculates percent change of numeric vectors" do
|
104
|
+
expect(@df.percent_change.round(2)).to eq(DaruLite::DataFrame.new({
|
105
|
+
d: [nil, 1.0, 0.0, 0.5, 0.0, 0.33, 0.25, 0.2, 0.17],
|
106
|
+
e: [nil, 1.0, 0.0, 0.5, 0.0, 0.33, 0.25, 0.2, 0.17],
|
107
|
+
f: [nil, 1.0, 0.0, 0.5, 0.0, 0.33, 0.25, 0.2, 0.17] }
|
108
|
+
))
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
context "#cov" do
|
113
|
+
it "calculates the variance covariance of the numeric vectors of DataFrame" do
|
114
|
+
expect(@df.cov).to eq(DaruLite::DataFrame.new({
|
115
|
+
d: [4,8,40],
|
116
|
+
e: [8,16,80],
|
117
|
+
f: [40,80,400]
|
118
|
+
}, index: [:d, :e, :f]
|
119
|
+
))
|
120
|
+
|
121
|
+
test = DaruLite::DataFrame.rows([
|
122
|
+
[0.3543,0.4535,0.2424],
|
123
|
+
[0.123,0.53323,0.544],
|
124
|
+
[0.4345,0.4552,0.425]
|
125
|
+
], order: [:a, :b, :c])
|
126
|
+
ans = DaruLite::DataFrame.new({
|
127
|
+
a: [0.0261607, -0.0071019, -0.0153640],
|
128
|
+
b: [-0.0071019, 0.0020747, 0.0056071],
|
129
|
+
c: [-0.0153640, 0.0056071, 0.0230777]
|
130
|
+
})
|
131
|
+
|
132
|
+
test.cov.each_vector_with_index do |v, i|
|
133
|
+
expect_correct_vector_in_delta v, ans[i], 0.01
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
context "#corr" do
|
139
|
+
it "calculates the correlation between the numeric vectors of DataFrame" do
|
140
|
+
expect(@df.corr).to eq(DaruLite::DataFrame.new({
|
141
|
+
d: [1,1,1],
|
142
|
+
e: [1,1,1],
|
143
|
+
f: [1,1,1]
|
144
|
+
}, index: [:d, :e, :f]
|
145
|
+
))
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
context "#cumsum" do
|
150
|
+
it "calculates cumulative sum of numeric vectors" do
|
151
|
+
answer = DaruLite::DataFrame.new({
|
152
|
+
d: [1,3,5,8,11,15,20,26,33],
|
153
|
+
e: [2,6,10,16,22,30,40,52,66],
|
154
|
+
f: [10,30,50,80,110,150,200,260,330]
|
155
|
+
})
|
156
|
+
expect(@df.cumsum).to eq(answer)
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
context "#rolling_mean" do
|
161
|
+
it "calculates rolling mean" do
|
162
|
+
v = DaruLite::Vector.new([17.28, 17.45, 17.84, 17.74, 17.82, 17.85, 17.36, 17.3, 17.56, 17.49, 17.46, 17.4, 17.03, 17.01,16.86, 16.86, 16.56, 16.36, 16.66, 16.77])
|
163
|
+
df = DaruLite::DataFrame.new({ a: v, b: v, c: v })
|
164
|
+
answer = df.rolling_mean
|
165
|
+
|
166
|
+
expect(answer[:a][-1]) .to be_within(0.001).of(16.897)
|
167
|
+
expect(answer[:b][-5]) .to be_within(0.001).of(17.233)
|
168
|
+
expect(answer[:c][-10]).to be_within(0.001).of(17.587)
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
context "#standardize" do
|
173
|
+
it "standardizes" do
|
174
|
+
# TODO: Write this test.
|
175
|
+
@df.standardize
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|