daru_lite 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.github/workflows/ci.yml +33 -0
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/.rubocop.yml +27 -0
- data/.rubocop_todo.yml +137 -0
- data/CONTRIBUTING.md +47 -0
- data/Gemfile +2 -0
- data/History.md +4 -0
- data/LICENSE +24 -0
- data/README.md +218 -0
- data/Rakefile +69 -0
- data/ReleasePolicy.md +20 -0
- data/benchmarks/TradeoffData.csv +65 -0
- data/benchmarks/csv_reading.rb +22 -0
- data/benchmarks/dataframe_creation.rb +39 -0
- data/benchmarks/db_loading.rb +34 -0
- data/benchmarks/duplicating.rb +45 -0
- data/benchmarks/group_by.rb +32 -0
- data/benchmarks/joining.rb +52 -0
- data/benchmarks/row_access.rb +41 -0
- data/benchmarks/row_assign.rb +36 -0
- data/benchmarks/sorting.rb +51 -0
- data/benchmarks/statistics.rb +28 -0
- data/benchmarks/vector_access.rb +31 -0
- data/benchmarks/vector_assign.rb +42 -0
- data/benchmarks/where_clause.rb +48 -0
- data/benchmarks/where_vs_filter.rb +28 -0
- data/daru_lite.gemspec +55 -0
- data/images/README.md +5 -0
- data/images/con0.png +0 -0
- data/images/con1.png +0 -0
- data/images/init0.png +0 -0
- data/images/init1.png +0 -0
- data/images/man0.png +0 -0
- data/images/man1.png +0 -0
- data/images/man2.png +0 -0
- data/images/man3.png +0 -0
- data/images/man4.png +0 -0
- data/images/man5.png +0 -0
- data/images/man6.png +0 -0
- data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
- data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
- data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
- data/lib/daru_lite/category.rb +929 -0
- data/lib/daru_lite/configuration.rb +34 -0
- data/lib/daru_lite/core/group_by.rb +403 -0
- data/lib/daru_lite/core/merge.rb +270 -0
- data/lib/daru_lite/core/query.rb +109 -0
- data/lib/daru_lite/dataframe.rb +3080 -0
- data/lib/daru_lite/date_time/index.rb +569 -0
- data/lib/daru_lite/date_time/offsets.rb +397 -0
- data/lib/daru_lite/exceptions.rb +2 -0
- data/lib/daru_lite/extensions/which_dsl.rb +53 -0
- data/lib/daru_lite/formatters/table.rb +52 -0
- data/lib/daru_lite/helpers/array.rb +53 -0
- data/lib/daru_lite/index/categorical_index.rb +201 -0
- data/lib/daru_lite/index/index.rb +374 -0
- data/lib/daru_lite/index/multi_index.rb +374 -0
- data/lib/daru_lite/io/csv/converters.rb +21 -0
- data/lib/daru_lite/io/io.rb +294 -0
- data/lib/daru_lite/io/sql_data_source.rb +97 -0
- data/lib/daru_lite/iruby/helpers.rb +38 -0
- data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
- data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
- data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
- data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
- data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
- data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
- data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
- data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
- data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
- data/lib/daru_lite/monkeys.rb +56 -0
- data/lib/daru_lite/vector.rb +1678 -0
- data/lib/daru_lite/version.rb +3 -0
- data/lib/daru_lite.rb +99 -0
- data/profile/_base.rb +23 -0
- data/profile/df_to_a.rb +10 -0
- data/profile/filter.rb +13 -0
- data/profile/joining.rb +13 -0
- data/profile/sorting.rb +12 -0
- data/profile/vector_each_with_index.rb +9 -0
- data/profile/vector_new.rb +9 -0
- data/spec/accessors/array_wrapper_spec.rb +3 -0
- data/spec/category_spec.rb +1741 -0
- data/spec/core/group_by_spec.rb +655 -0
- data/spec/core/merge_spec.rb +179 -0
- data/spec/core/query_spec.rb +347 -0
- data/spec/daru_lite_spec.rb +22 -0
- data/spec/dataframe_spec.rb +4330 -0
- data/spec/date_time/data_spec.rb +197 -0
- data/spec/date_time/date_time_index_helper_spec.rb +72 -0
- data/spec/date_time/index_spec.rb +588 -0
- data/spec/date_time/offsets_spec.rb +465 -0
- data/spec/extensions/which_dsl_spec.rb +38 -0
- data/spec/fixtures/bank2.dat +200 -0
- data/spec/fixtures/boolean_converter_test.csv +5 -0
- data/spec/fixtures/countries.json +7794 -0
- data/spec/fixtures/duplicates.csv +32 -0
- data/spec/fixtures/eciresults.html +394 -0
- data/spec/fixtures/empties.dat +2 -0
- data/spec/fixtures/empty_rows_test.csv +17 -0
- data/spec/fixtures/macau.html +3691 -0
- data/spec/fixtures/macd_data.csv +150 -0
- data/spec/fixtures/matrix_test.csv +100 -0
- data/spec/fixtures/moneycontrol.html +6812 -0
- data/spec/fixtures/music_data.tsv +2501 -0
- data/spec/fixtures/repeated_fields.csv +7 -0
- data/spec/fixtures/sales-funnel.csv +18 -0
- data/spec/fixtures/scientific_notation.csv +4 -0
- data/spec/fixtures/string_converter_test.csv +5 -0
- data/spec/fixtures/strings.dat +2 -0
- data/spec/fixtures/test_xls.xls +0 -0
- data/spec/fixtures/test_xls_2.xls +0 -0
- data/spec/fixtures/url_test.txt~ +0 -0
- data/spec/fixtures/valid_markup.html +62 -0
- data/spec/fixtures/wiki_climate.html +1243 -0
- data/spec/fixtures/wiki_table_info.html +631 -0
- data/spec/formatters/table_formatter_spec.rb +137 -0
- data/spec/helpers_spec.rb +8 -0
- data/spec/index/categorical_index_spec.rb +170 -0
- data/spec/index/index_spec.rb +417 -0
- data/spec/index/multi_index_spec.rb +680 -0
- data/spec/io/io_spec.rb +373 -0
- data/spec/io/sql_data_source_spec.rb +56 -0
- data/spec/iruby/dataframe_spec.rb +170 -0
- data/spec/iruby/helpers_spec.rb +49 -0
- data/spec/iruby/multi_index_spec.rb +37 -0
- data/spec/iruby/vector_spec.rb +105 -0
- data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
- data/spec/maths/arithmetic/vector_spec.rb +165 -0
- data/spec/maths/statistics/dataframe_spec.rb +178 -0
- data/spec/maths/statistics/vector_spec.rb +756 -0
- data/spec/monkeys_spec.rb +42 -0
- data/spec/shared/vector_display_spec.rb +213 -0
- data/spec/spec_helper.rb +87 -0
- data/spec/support/database_helper.rb +30 -0
- data/spec/support/matchers.rb +5 -0
- data/spec/vector_spec.rb +2293 -0
- metadata +571 -0
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
describe DaruLite::Vector, '#to_html' do
|
|
2
|
+
[nil, :category].each do |type|
|
|
3
|
+
let(:doc) { Nokogiri::HTML(vector.to_html) }
|
|
4
|
+
subject(:table) { doc.at('table') }
|
|
5
|
+
let(:header) { doc.at('b') }
|
|
6
|
+
|
|
7
|
+
context 'simple' do
|
|
8
|
+
let(:vector) { DaruLite::Vector.new [1,nil,3],
|
|
9
|
+
index: [:a, :b, :c], name: 'test', type: type }
|
|
10
|
+
it { is_expected.not_to be_nil }
|
|
11
|
+
|
|
12
|
+
describe 'header' do
|
|
13
|
+
subject { header }
|
|
14
|
+
it { is_expected.not_to be_nil }
|
|
15
|
+
its(:text) { is_expected.to eq " DaruLite::Vector(3)"\
|
|
16
|
+
"#{":category" if type == :category} " }
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
describe 'name' do
|
|
20
|
+
subject(:name) { table.at('tr:nth-child(1) > th:nth-child(2)') }
|
|
21
|
+
it { is_expected.not_to be_nil }
|
|
22
|
+
its(:text) { is_expected.to eq 'test' }
|
|
23
|
+
|
|
24
|
+
context 'withought name' do
|
|
25
|
+
let(:vector) { DaruLite::Vector.new [1,nil,3], index: [:a, :b, :c], type: type }
|
|
26
|
+
|
|
27
|
+
it { is_expected.to be_nil }
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
describe 'index' do
|
|
32
|
+
subject(:indexes) { table.search('tr > td:first-child').map(&:text) }
|
|
33
|
+
its(:count) { is_expected.to eq vector.size }
|
|
34
|
+
it { is_expected.to eq vector.index.to_a.map(&:to_s) }
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
describe 'values' do
|
|
38
|
+
subject(:indexes) { table.search('tr > td:last-child').map(&:text) }
|
|
39
|
+
its(:count) { is_expected.to eq vector.size }
|
|
40
|
+
it { is_expected.to eq vector.to_a.map(&:to_s) }
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
context 'large vector' do
|
|
45
|
+
subject(:vector) { DaruLite::Vector.new [1,2,3] * 100, name: 'test', type: type }
|
|
46
|
+
it 'has only 30 rows (+ 1 header rows, + 2 finishing rows)' do
|
|
47
|
+
expect(table.search('tr').size).to eq 33
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
describe '"skipped" row' do
|
|
51
|
+
subject(:row) { table.search('tr:nth-child(31) td').map(&:text) }
|
|
52
|
+
its(:count) { is_expected.to eq 2 }
|
|
53
|
+
it { is_expected.to eq ['...', '...'] }
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
describe 'last row' do
|
|
57
|
+
subject(:row) { table.search('tr:nth-child(32) td').map(&:text) }
|
|
58
|
+
its(:count) { is_expected.to eq 2 }
|
|
59
|
+
it { is_expected.to eq ['299', '3'] }
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
context 'multi-index' do
|
|
64
|
+
subject(:vector) {
|
|
65
|
+
DaruLite::Vector.new(
|
|
66
|
+
[1,2,3,4,5,6,7],
|
|
67
|
+
name: 'test',
|
|
68
|
+
type: type,
|
|
69
|
+
index: DaruLite::MultiIndex.from_tuples([
|
|
70
|
+
%w[foo one],
|
|
71
|
+
%w[foo two],
|
|
72
|
+
%w[foo three],
|
|
73
|
+
%w[bar one],
|
|
74
|
+
%w[bar two],
|
|
75
|
+
%w[bar three],
|
|
76
|
+
%w[baz one],
|
|
77
|
+
]),
|
|
78
|
+
)
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
describe 'header' do
|
|
82
|
+
subject { header }
|
|
83
|
+
it { is_expected.not_to be_nil }
|
|
84
|
+
its(:text) { is_expected.to eq " DaruLite::Vector(7)"\
|
|
85
|
+
"#{":category" if type == :category} " }
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
describe 'name row' do
|
|
89
|
+
subject(:row) { table.at('tr:nth-child(1)').search('th') }
|
|
90
|
+
its(:count) { should == 2 }
|
|
91
|
+
it { expect(row.first['colspan']).to eq '2' }
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
describe 'first data row' do
|
|
95
|
+
let(:row) { table.at('tbody > tr:first-child') }
|
|
96
|
+
subject { row.inner_html.scan(/<t[dh].+?<\/t[dh]>/) }
|
|
97
|
+
it { is_expected.to eq [
|
|
98
|
+
'<th rowspan="3">foo</th>',
|
|
99
|
+
'<th rowspan="1">one</th>',
|
|
100
|
+
'<td>1</td>'
|
|
101
|
+
]}
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
describe DaruLite::DataFrame do
|
|
2
|
+
before(:each) do
|
|
3
|
+
@df = DaruLite::DataFrame.new({a: [1,2,3,4,5], b: ['a','e','i','o','u'],
|
|
4
|
+
c: [10,20,30,40,50]})
|
|
5
|
+
@left = DaruLite::DataFrame.new({a: [1,nil,nil,4], b: [10,nil,nil,40], c: [5,6,7,8]},
|
|
6
|
+
index: [0,4,5,3])
|
|
7
|
+
@right = DaruLite::DataFrame.new({a: [1,2,3,4,5], b: [10,20,30,40,50]},
|
|
8
|
+
index: [0,1,2,3,6])
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
context "#+" do
|
|
12
|
+
it "adds a number to all numeric vectors" do
|
|
13
|
+
expect(@df + 2).to eq(DaruLite::DataFrame.new({a: [3,4,5,6,7], b: ['a','e','i','o','u'],
|
|
14
|
+
c: [12,22,32,42,52] }))
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
it "adds two dataframes to produce a third" do
|
|
18
|
+
expect(@left + @right).to eq(DaruLite::DataFrame.new({
|
|
19
|
+
a: [2,nil,nil,8,nil,nil],
|
|
20
|
+
b: [20,nil,nil,80,nil,nil],
|
|
21
|
+
c: [nil,nil,nil,nil,nil,nil]
|
|
22
|
+
}, index: [0,1,2,3,4,5,6]))
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
context "#-" do
|
|
27
|
+
it "subtracts a number from all numeric vectors" do
|
|
28
|
+
expect(@df - 2).to eq(DaruLite::DataFrame.new({
|
|
29
|
+
a: [-1,0,1,2,3],
|
|
30
|
+
b: ['a','e','i','o','u'],
|
|
31
|
+
c: [8,18,28,38,48]}))
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
it "subtracts a data frame from another" do
|
|
35
|
+
expect(@left - @right).to eq(DaruLite::DataFrame.new({
|
|
36
|
+
a: [0,nil,nil,0,nil,nil],
|
|
37
|
+
b: [0,nil,nil,0,nil,nil],
|
|
38
|
+
c: [nil,nil,nil,nil,nil,nil]
|
|
39
|
+
}, index: [0,1,2,3,4,5,6]))
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
context "#*" do
|
|
44
|
+
it "multiplies all numeric vectors by number" do
|
|
45
|
+
expect(@df * 2).to eq(DaruLite::DataFrame.new({a: [2,4,6,8,10], b: ['a','e','i','o','u'],
|
|
46
|
+
c: [20,40,60,80,100] }))
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
it "multipies two dataframes to produce a third" do
|
|
50
|
+
expect(@left * @right).to eq(DaruLite::DataFrame.new({
|
|
51
|
+
a: [1,nil,nil,16,nil,nil],
|
|
52
|
+
b: [100,nil,nil,1600,nil,nil],
|
|
53
|
+
c: [nil,nil,nil,nil,nil,nil]
|
|
54
|
+
}, index: [0,1,2,3,4,5,6]))
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
context "#/" do
|
|
59
|
+
it "divides all numeric vectors by number" do
|
|
60
|
+
expect(@df / 2.0).to eq(DaruLite::DataFrame.new({a: [0.5, 1, 1.5, 2, 2.5], b: ['a','e','i','o','u'],
|
|
61
|
+
c: [5,10,15,20,25] }))
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
it "multipies two dataframes to produce a third" do
|
|
65
|
+
# NB: this and other tests of two DF interactions are not EXTREMELY
|
|
66
|
+
# useful, but to know that nothing unexpected emerges here
|
|
67
|
+
expect(@left / @right).to eq(DaruLite::DataFrame.new({
|
|
68
|
+
a: [1,nil,nil,1,nil,nil],
|
|
69
|
+
b: [1,nil,nil,1,nil,nil],
|
|
70
|
+
c: [nil,nil,nil,nil,nil,nil]
|
|
71
|
+
}, index: [0,1,2,3,4,5,6]))
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
context "#%" do
|
|
76
|
+
it "divides all numeric vectors by number and returns reminder" do
|
|
77
|
+
expect(@df % 3).to eq(DaruLite::DataFrame.new({a: [1, 2, 0, 1, 2], b: ['a','e','i','o','u'],
|
|
78
|
+
c: [1, 2, 0, 1, 2] }))
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
it "returns reminder of per-item division" do
|
|
82
|
+
expect(@left % @right).to eq(DaruLite::DataFrame.new({
|
|
83
|
+
a: [0,nil,nil,0,nil,nil],
|
|
84
|
+
b: [0,nil,nil,0,nil,nil],
|
|
85
|
+
c: [nil,nil,nil,nil,nil,nil]
|
|
86
|
+
}, index: [0,1,2,3,4,5,6]))
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
context "#**" do
|
|
91
|
+
it "calculates result of each numeric value pow" do
|
|
92
|
+
expect(@df ** 2).to eq(DaruLite::DataFrame.new({a: [1, 4, 9, 16, 25], b: ['a','e','i','o','u'],
|
|
93
|
+
c: [100, 400, 900, 1600, 2500] }))
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
it "returns per-item pow" do
|
|
97
|
+
expect(@left ** @right).to eq(DaruLite::DataFrame.new({
|
|
98
|
+
a: [1,nil,nil,4**4,nil,nil],
|
|
99
|
+
b: [10**10,nil,nil,40**40,nil,nil],
|
|
100
|
+
c: [nil,nil,nil,nil,nil,nil]
|
|
101
|
+
}, index: [0,1,2,3,4,5,6]))
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
context "#sqrt" do
|
|
106
|
+
it "calculates sqrt" do
|
|
107
|
+
expect_correct_df_in_delta(@df.sqrt,
|
|
108
|
+
DaruLite::DataFrame.new({
|
|
109
|
+
a: [1.0,1.41421356,1.73205080,2.0,2.23606797],
|
|
110
|
+
c: [3.16227766, 4.47213595 ,5.47722557 ,6.32455532, 7.07106781]
|
|
111
|
+
}), 0.001
|
|
112
|
+
)
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
context "#round" do
|
|
117
|
+
it "rounds to precision" do
|
|
118
|
+
df = DaruLite::DataFrame.new({
|
|
119
|
+
a: [1.3434,2.4332,5.6655,12.3344,32.233],
|
|
120
|
+
b: [1.3434,2.4332,5.6655,12.3344,32.233],
|
|
121
|
+
c: %w(a b c d e)
|
|
122
|
+
})
|
|
123
|
+
ans = DaruLite::DataFrame.new({
|
|
124
|
+
a: [1.34,2.43,5.67,12.33,32.23],
|
|
125
|
+
b: [1.34,2.43,5.67,12.33,32.23],
|
|
126
|
+
})
|
|
127
|
+
|
|
128
|
+
expect(df.round(2)).to eq(ans)
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
context "#exp" do
|
|
133
|
+
it "calculates exponential" do
|
|
134
|
+
e = Math::E
|
|
135
|
+
df = DaruLite::DataFrame.new({
|
|
136
|
+
a: [1,2,3],
|
|
137
|
+
b: [4,5,6],
|
|
138
|
+
c: %w(a b c)
|
|
139
|
+
})
|
|
140
|
+
ans = DaruLite::DataFrame.new({
|
|
141
|
+
a: [e, e**2, e**3],
|
|
142
|
+
b: [e**4, e**5, e**6],
|
|
143
|
+
})
|
|
144
|
+
|
|
145
|
+
expect_correct_df_in_delta(df.exp, ans, 0.0001)
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
describe DaruLite::Vector do
|
|
2
|
+
let(:dv1) { described_class.new(values1, name: :boozy, index: indexes1) }
|
|
3
|
+
let(:dv2) { described_class.new(values2, name: :mayer, index: indexes2) }
|
|
4
|
+
let(:with_md1) do
|
|
5
|
+
described_class.new([1, 2, 3, nil, 5, nil], name: :missing, index: indexes_with_md1)
|
|
6
|
+
end
|
|
7
|
+
let(:with_md2) do
|
|
8
|
+
described_class.new([1, 2, 3, nil, 5, nil], name: :missing, index: [:obi, :wan, :corona, :a, :b, :c])
|
|
9
|
+
end
|
|
10
|
+
let(:values1) { [1, 2, 3, 4] }
|
|
11
|
+
let(:values2) { [1, 2, 3, 4] }
|
|
12
|
+
let(:indexes1) { [:bud, :kf, :henie, :corona] }
|
|
13
|
+
let(:indexes2) { [:obi, :wan, :kf, :corona] }
|
|
14
|
+
let(:indexes1_and_2) { [:bud, :corona, :henie, :kf, :obi, :wan] }
|
|
15
|
+
let(:indexes_with_md1) { [:a, :b, :c, :obi, :wan, :corona] }
|
|
16
|
+
let(:indexes_with_md1_and_2) { [:a, :b, :c, :corona, :obi, :wan] }
|
|
17
|
+
|
|
18
|
+
describe "#+" do
|
|
19
|
+
it "adds matching indexes of the other vector" do
|
|
20
|
+
expect(dv1 + dv2).to eq(
|
|
21
|
+
DaruLite::Vector.new([nil, 8, nil, 5, nil, nil], name: :boozy, index: indexes1_and_2)
|
|
22
|
+
)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
it "adds number to each element of the entire vector" do
|
|
26
|
+
expect(dv1 + 5).to eq(DaruLite::Vector.new(values1.map { |v| v + 5 }, name: :boozy, index: indexes1))
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
it "does not add when a number is being added" do
|
|
30
|
+
expect(with_md1 + 1).to eq(
|
|
31
|
+
DaruLite::Vector.new([2, 3, 4, nil, 6, nil], name: :missing, index: indexes_with_md1)
|
|
32
|
+
)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
it "puts a nil when one of the operands is nil" do
|
|
36
|
+
expect(with_md1 + with_md2).to eq(
|
|
37
|
+
DaruLite::Vector.new([nil, 7, nil, nil, nil, 7], name: :missing, index: indexes_with_md1_and_2)
|
|
38
|
+
)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
context 'when vectors have numeric and non-numeric indexes' do
|
|
42
|
+
let(:indexes1) { nil }
|
|
43
|
+
let(:indexes2) { [:a, :b, :c, :d] }
|
|
44
|
+
|
|
45
|
+
it "appropriately adds vectors with numeric and non-numeric indexes" do
|
|
46
|
+
expect(dv1 + dv2).to eq(DaruLite::Vector.new(Array.new(6), index: [0, 1, 2, 3] + indexes2))
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
context 'when index contains symbols and strings' do
|
|
51
|
+
let(:indexes1) { [:bud, 'kf', :henie, :corona] }
|
|
52
|
+
let(:indexes2) { [:obi, :wan, 'kf', :corona] }
|
|
53
|
+
|
|
54
|
+
it "adds matching indexes of the other vector" do
|
|
55
|
+
expect(dv1 + dv2).to eq(
|
|
56
|
+
DaruLite::Vector.new([nil, 8, nil, 5, nil, nil], name: :boozy, index: [:bud, :corona, :henie, 'kf', :obi, :wan])
|
|
57
|
+
)
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
describe "#-" do
|
|
63
|
+
it "subtracts matching indexes of the other vector" do
|
|
64
|
+
expect(dv1 - dv2).to eq(
|
|
65
|
+
DaruLite::Vector.new([nil, 0, nil, -1, nil, nil], name: :boozy, index: indexes1_and_2)
|
|
66
|
+
)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
it "subtracts number from each element of the entire vector" do
|
|
70
|
+
expect(dv1 - 5).to eq(DaruLite::Vector.new(values1.map { |v| v - 5 }, name: :boozy, index: indexes1))
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
describe "#*" do
|
|
75
|
+
it "multiplies matching indexes of the other vector" do
|
|
76
|
+
expect(dv1 * dv2).to eq(
|
|
77
|
+
DaruLite::Vector.new([nil, 16, nil, 6, nil, nil], name: :boozy, index: indexes1_and_2)
|
|
78
|
+
)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
it "multiplies number to each element of the entire vector" do
|
|
82
|
+
expect(dv1 * 5).to eq(DaruLite::Vector.new(values1.map { |v| v * 5 }, name: :boozy, index: indexes1))
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
describe "#\/" do
|
|
87
|
+
let(:values2) { [1.0, 2.0, 3.0, 4.0] }
|
|
88
|
+
|
|
89
|
+
it "divides matching indexes of the other vector" do
|
|
90
|
+
expect(dv1 / dv2).to eq(
|
|
91
|
+
DaruLite::Vector.new([nil, 1.0, nil, 2 / 3.to_f, nil, nil], name: :boozy, index: indexes1_and_2)
|
|
92
|
+
)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
it "divides number from each element of the entire vector" do
|
|
96
|
+
expect(dv1 / 5.0).to eq(DaruLite::Vector.new(values1.map { |v| v / 5.0 }, name: :boozy, index: indexes1))
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
describe "#%" do
|
|
101
|
+
it "applies % to matching indexes of the other vector" do
|
|
102
|
+
expect(dv1 % dv2).to eq(DaruLite::Vector.new([nil, 0, nil, 2, nil, nil], name: :boozy, index: indexes1_and_2))
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
it "applies % for each element of the entire vector" do
|
|
106
|
+
expect(dv1 % 5).to eq(
|
|
107
|
+
DaruLite::Vector.new(values1.map { |v| v % 5 }, name: :boozy, index: indexes1)
|
|
108
|
+
)
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
describe "#**" do
|
|
113
|
+
it "applies ** to matching indexes of the other vector" do
|
|
114
|
+
expect(dv1 ** dv2).to eq(DaruLite::Vector.new([nil, 256, nil, 8, nil, nil], name: :boozy, index: indexes1_and_2))
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
it "applies ** for each element of the entire vector" do
|
|
118
|
+
expect(dv1 ** 5).to eq(DaruLite::Vector.new(values1.map { |v| v ** 5 }, name: :boozy, index: indexes1))
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
describe "#exp" do
|
|
123
|
+
it "calculates exp of all numbers" do
|
|
124
|
+
expect(with_md1.exp.round(3)).to eq(
|
|
125
|
+
DaruLite::Vector.new(
|
|
126
|
+
[2.718281828459045, 7.38905609893065, 20.085536923187668, nil, 148.4131591025766, nil],
|
|
127
|
+
index: indexes_with_md1,
|
|
128
|
+
name: :missing
|
|
129
|
+
).round(3)
|
|
130
|
+
)
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
describe "#add" do
|
|
135
|
+
it "adds two vectors with nils as 0 if skipnil is true" do
|
|
136
|
+
expect(with_md1.add(with_md2, skipnil: true)).to eq(
|
|
137
|
+
DaruLite::Vector.new([1, 7, 3, 3, 1, 7], name: :missing, index: indexes_with_md1_and_2)
|
|
138
|
+
)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
it "adds two vectors same as :+ if skipnil is false" do
|
|
142
|
+
expect(with_md1.add(with_md2, skipnil: false)).to eq(
|
|
143
|
+
DaruLite::Vector.new([nil, 7, nil, nil, nil, 7], name: :missing, index: indexes_with_md1_and_2)
|
|
144
|
+
)
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
describe "#abs" do
|
|
149
|
+
it "calculates abs value" do
|
|
150
|
+
with_md1.abs
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
describe "#sqrt" do
|
|
155
|
+
it "calculates sqrt" do
|
|
156
|
+
with_md1.sqrt
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
describe "#round" do
|
|
161
|
+
it "rounds to given precision" do
|
|
162
|
+
with_md1.round(2)
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
end
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
describe DaruLite::DataFrame do
|
|
2
|
+
before do
|
|
3
|
+
@df = DaruLite::DataFrame.new({
|
|
4
|
+
a: ['foo' , 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar'],
|
|
5
|
+
b: ['one' , 'one', 'one', 'two', 'two', 'one', 'one', 'two', 'two'],
|
|
6
|
+
c: ['small','large','large','small','small','large','small','large','small'],
|
|
7
|
+
d: [1,2,2,3,3,4,5,6,7],
|
|
8
|
+
e: [2,4,4,6,6,8,10,12,14],
|
|
9
|
+
f: [10,20,20,30,30,40,50,60,70]
|
|
10
|
+
})
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
context "#mean" do
|
|
14
|
+
it "calculates mean of single level numeric only vectors and returns values in a Vector" do
|
|
15
|
+
expect(@df.mean.round(2)).to eq(DaruLite::Vector.new([3.67, 7.33, 36.67],
|
|
16
|
+
index: [:d, :e, :f]
|
|
17
|
+
))
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
it "calculates mean of multi level numeric only vectors and returns values in a DataFrame" do
|
|
21
|
+
# TODO - pending
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
context "#variance_sample" do
|
|
26
|
+
it "calculates variance of single level numeric only vectors and returns values in a Vector" do
|
|
27
|
+
expect(@df.variance_sample).to eq(DaruLite::Vector.new([4.0, 16.0, 400.0], index: [:d, :e, :f]))
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
context "#std" do
|
|
32
|
+
it "calculates standard deviation of single level numeric only vectors and returns values in a Vector" do
|
|
33
|
+
expect(@df.std).to eq(DaruLite::Vector.new([2, 4, 20], index: [:d, :e, :f]))
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
context "#sum" do
|
|
38
|
+
it "calculates sum of single level numeric only vectors and returns values in a Vector" do
|
|
39
|
+
expect(@df.sum).to eq(DaruLite::Vector.new([33, 66, 330], index: [:d, :e, :f]))
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
context "#count" do
|
|
44
|
+
it "counts number of non-nil single level numeric only vectors and returns values in a Vector" do
|
|
45
|
+
expect(@df.count).to eq(DaruLite::Vector.new([9, 9, 9], index: [:d, :e, :f]))
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
context "#mode" do
|
|
50
|
+
it "calculates mode of single level numeric only vectors and returns values in a Vector" do
|
|
51
|
+
expect(@df.mode).to eq(DaruLite::Vector.new([DaruLite::Vector.new([2,3]), DaruLite::Vector.new([4,6]), DaruLite::Vector.new([20,30])], index: [:d, :e, :f]))
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
context "#median" do
|
|
56
|
+
it "calculates median of single level numeric only vectors and returns values in a Vector" do
|
|
57
|
+
expect(@df.median).to eq(DaruLite::Vector.new([3, 6, 30], index: [:d, :e, :f]))
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
context "#max" do
|
|
62
|
+
it "returns the row that has max" do
|
|
63
|
+
df = DaruLite::DataFrame.new({
|
|
64
|
+
a: [1,2,3,4,5],
|
|
65
|
+
b: ['aa','aaa','a','','dfffdf'],
|
|
66
|
+
c: [11,22,33,44,55]
|
|
67
|
+
})
|
|
68
|
+
expect(df.max(vector: :b)).to eq(
|
|
69
|
+
DaruLite::Vector.new([5,'dfffdf',55], index: [:a, :b, :c]))
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
context "#min" do
|
|
74
|
+
it "calculates mininum of single level numeric only vectors and returns values in a Vector" do
|
|
75
|
+
expect(@df.min).to eq(DaruLite::Vector.new([1, 2, 10], index: [:d, :e, :f]))
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
context "#range" do
|
|
80
|
+
it "calculates range of single level numeric only vectors and returns values in a Vector" do
|
|
81
|
+
expect(@df.range).to eq(DaruLite::Vector.new([6, 12, 60], index: [:d, :e, :f]))
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
context "#product" do
|
|
86
|
+
it "calculates product of single level numeric only vectors and returns values in a Vector" do
|
|
87
|
+
expect(@df.product).to eq(DaruLite::Vector.new([30240, 15482880, 30240000000000], index: [:d, :e, :f]))
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
context "#describe" do
|
|
92
|
+
it "generates mean, std, max, min and count of numeric vectors in one shot" do
|
|
93
|
+
expect(@df.describe.round(2)).to eq(DaruLite::DataFrame.new({
|
|
94
|
+
d: [9.00, 3.67 ,2.00 , 1.00, 7.00],
|
|
95
|
+
e: [9.00, 7.33 ,4.00 , 2.00, 14.00],
|
|
96
|
+
f: [9.00, 36.67,20.00,10.00, 70.00]
|
|
97
|
+
}, index: [:count, :mean, :std, :min, :max]
|
|
98
|
+
))
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
context "percent_change" do
|
|
103
|
+
it "calculates percent change of numeric vectors" do
|
|
104
|
+
expect(@df.percent_change.round(2)).to eq(DaruLite::DataFrame.new({
|
|
105
|
+
d: [nil, 1.0, 0.0, 0.5, 0.0, 0.33, 0.25, 0.2, 0.17],
|
|
106
|
+
e: [nil, 1.0, 0.0, 0.5, 0.0, 0.33, 0.25, 0.2, 0.17],
|
|
107
|
+
f: [nil, 1.0, 0.0, 0.5, 0.0, 0.33, 0.25, 0.2, 0.17] }
|
|
108
|
+
))
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
context "#cov" do
|
|
113
|
+
it "calculates the variance covariance of the numeric vectors of DataFrame" do
|
|
114
|
+
expect(@df.cov).to eq(DaruLite::DataFrame.new({
|
|
115
|
+
d: [4,8,40],
|
|
116
|
+
e: [8,16,80],
|
|
117
|
+
f: [40,80,400]
|
|
118
|
+
}, index: [:d, :e, :f]
|
|
119
|
+
))
|
|
120
|
+
|
|
121
|
+
test = DaruLite::DataFrame.rows([
|
|
122
|
+
[0.3543,0.4535,0.2424],
|
|
123
|
+
[0.123,0.53323,0.544],
|
|
124
|
+
[0.4345,0.4552,0.425]
|
|
125
|
+
], order: [:a, :b, :c])
|
|
126
|
+
ans = DaruLite::DataFrame.new({
|
|
127
|
+
a: [0.0261607, -0.0071019, -0.0153640],
|
|
128
|
+
b: [-0.0071019, 0.0020747, 0.0056071],
|
|
129
|
+
c: [-0.0153640, 0.0056071, 0.0230777]
|
|
130
|
+
})
|
|
131
|
+
|
|
132
|
+
test.cov.each_vector_with_index do |v, i|
|
|
133
|
+
expect_correct_vector_in_delta v, ans[i], 0.01
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
context "#corr" do
|
|
139
|
+
it "calculates the correlation between the numeric vectors of DataFrame" do
|
|
140
|
+
expect(@df.corr).to eq(DaruLite::DataFrame.new({
|
|
141
|
+
d: [1,1,1],
|
|
142
|
+
e: [1,1,1],
|
|
143
|
+
f: [1,1,1]
|
|
144
|
+
}, index: [:d, :e, :f]
|
|
145
|
+
))
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
context "#cumsum" do
|
|
150
|
+
it "calculates cumulative sum of numeric vectors" do
|
|
151
|
+
answer = DaruLite::DataFrame.new({
|
|
152
|
+
d: [1,3,5,8,11,15,20,26,33],
|
|
153
|
+
e: [2,6,10,16,22,30,40,52,66],
|
|
154
|
+
f: [10,30,50,80,110,150,200,260,330]
|
|
155
|
+
})
|
|
156
|
+
expect(@df.cumsum).to eq(answer)
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
context "#rolling_mean" do
|
|
161
|
+
it "calculates rolling mean" do
|
|
162
|
+
v = DaruLite::Vector.new([17.28, 17.45, 17.84, 17.74, 17.82, 17.85, 17.36, 17.3, 17.56, 17.49, 17.46, 17.4, 17.03, 17.01,16.86, 16.86, 16.56, 16.36, 16.66, 16.77])
|
|
163
|
+
df = DaruLite::DataFrame.new({ a: v, b: v, c: v })
|
|
164
|
+
answer = df.rolling_mean
|
|
165
|
+
|
|
166
|
+
expect(answer[:a][-1]) .to be_within(0.001).of(16.897)
|
|
167
|
+
expect(answer[:b][-5]) .to be_within(0.001).of(17.233)
|
|
168
|
+
expect(answer[:c][-10]).to be_within(0.001).of(17.587)
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
context "#standardize" do
|
|
173
|
+
it "standardizes" do
|
|
174
|
+
# TODO: Write this test.
|
|
175
|
+
@df.standardize
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
end
|