daru_lite 0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.github/workflows/ci.yml +33 -0
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/.rubocop.yml +27 -0
- data/.rubocop_todo.yml +137 -0
- data/CONTRIBUTING.md +47 -0
- data/Gemfile +2 -0
- data/History.md +4 -0
- data/LICENSE +24 -0
- data/README.md +218 -0
- data/Rakefile +69 -0
- data/ReleasePolicy.md +20 -0
- data/benchmarks/TradeoffData.csv +65 -0
- data/benchmarks/csv_reading.rb +22 -0
- data/benchmarks/dataframe_creation.rb +39 -0
- data/benchmarks/db_loading.rb +34 -0
- data/benchmarks/duplicating.rb +45 -0
- data/benchmarks/group_by.rb +32 -0
- data/benchmarks/joining.rb +52 -0
- data/benchmarks/row_access.rb +41 -0
- data/benchmarks/row_assign.rb +36 -0
- data/benchmarks/sorting.rb +51 -0
- data/benchmarks/statistics.rb +28 -0
- data/benchmarks/vector_access.rb +31 -0
- data/benchmarks/vector_assign.rb +42 -0
- data/benchmarks/where_clause.rb +48 -0
- data/benchmarks/where_vs_filter.rb +28 -0
- data/daru_lite.gemspec +55 -0
- data/images/README.md +5 -0
- data/images/con0.png +0 -0
- data/images/con1.png +0 -0
- data/images/init0.png +0 -0
- data/images/init1.png +0 -0
- data/images/man0.png +0 -0
- data/images/man1.png +0 -0
- data/images/man2.png +0 -0
- data/images/man3.png +0 -0
- data/images/man4.png +0 -0
- data/images/man5.png +0 -0
- data/images/man6.png +0 -0
- data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
- data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
- data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
- data/lib/daru_lite/category.rb +929 -0
- data/lib/daru_lite/configuration.rb +34 -0
- data/lib/daru_lite/core/group_by.rb +403 -0
- data/lib/daru_lite/core/merge.rb +270 -0
- data/lib/daru_lite/core/query.rb +109 -0
- data/lib/daru_lite/dataframe.rb +3080 -0
- data/lib/daru_lite/date_time/index.rb +569 -0
- data/lib/daru_lite/date_time/offsets.rb +397 -0
- data/lib/daru_lite/exceptions.rb +2 -0
- data/lib/daru_lite/extensions/which_dsl.rb +53 -0
- data/lib/daru_lite/formatters/table.rb +52 -0
- data/lib/daru_lite/helpers/array.rb +53 -0
- data/lib/daru_lite/index/categorical_index.rb +201 -0
- data/lib/daru_lite/index/index.rb +374 -0
- data/lib/daru_lite/index/multi_index.rb +374 -0
- data/lib/daru_lite/io/csv/converters.rb +21 -0
- data/lib/daru_lite/io/io.rb +294 -0
- data/lib/daru_lite/io/sql_data_source.rb +97 -0
- data/lib/daru_lite/iruby/helpers.rb +38 -0
- data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
- data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
- data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
- data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
- data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
- data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
- data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
- data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
- data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
- data/lib/daru_lite/monkeys.rb +56 -0
- data/lib/daru_lite/vector.rb +1678 -0
- data/lib/daru_lite/version.rb +3 -0
- data/lib/daru_lite.rb +99 -0
- data/profile/_base.rb +23 -0
- data/profile/df_to_a.rb +10 -0
- data/profile/filter.rb +13 -0
- data/profile/joining.rb +13 -0
- data/profile/sorting.rb +12 -0
- data/profile/vector_each_with_index.rb +9 -0
- data/profile/vector_new.rb +9 -0
- data/spec/accessors/array_wrapper_spec.rb +3 -0
- data/spec/category_spec.rb +1741 -0
- data/spec/core/group_by_spec.rb +655 -0
- data/spec/core/merge_spec.rb +179 -0
- data/spec/core/query_spec.rb +347 -0
- data/spec/daru_lite_spec.rb +22 -0
- data/spec/dataframe_spec.rb +4330 -0
- data/spec/date_time/data_spec.rb +197 -0
- data/spec/date_time/date_time_index_helper_spec.rb +72 -0
- data/spec/date_time/index_spec.rb +588 -0
- data/spec/date_time/offsets_spec.rb +465 -0
- data/spec/extensions/which_dsl_spec.rb +38 -0
- data/spec/fixtures/bank2.dat +200 -0
- data/spec/fixtures/boolean_converter_test.csv +5 -0
- data/spec/fixtures/countries.json +7794 -0
- data/spec/fixtures/duplicates.csv +32 -0
- data/spec/fixtures/eciresults.html +394 -0
- data/spec/fixtures/empties.dat +2 -0
- data/spec/fixtures/empty_rows_test.csv +17 -0
- data/spec/fixtures/macau.html +3691 -0
- data/spec/fixtures/macd_data.csv +150 -0
- data/spec/fixtures/matrix_test.csv +100 -0
- data/spec/fixtures/moneycontrol.html +6812 -0
- data/spec/fixtures/music_data.tsv +2501 -0
- data/spec/fixtures/repeated_fields.csv +7 -0
- data/spec/fixtures/sales-funnel.csv +18 -0
- data/spec/fixtures/scientific_notation.csv +4 -0
- data/spec/fixtures/string_converter_test.csv +5 -0
- data/spec/fixtures/strings.dat +2 -0
- data/spec/fixtures/test_xls.xls +0 -0
- data/spec/fixtures/test_xls_2.xls +0 -0
- data/spec/fixtures/url_test.txt~ +0 -0
- data/spec/fixtures/valid_markup.html +62 -0
- data/spec/fixtures/wiki_climate.html +1243 -0
- data/spec/fixtures/wiki_table_info.html +631 -0
- data/spec/formatters/table_formatter_spec.rb +137 -0
- data/spec/helpers_spec.rb +8 -0
- data/spec/index/categorical_index_spec.rb +170 -0
- data/spec/index/index_spec.rb +417 -0
- data/spec/index/multi_index_spec.rb +680 -0
- data/spec/io/io_spec.rb +373 -0
- data/spec/io/sql_data_source_spec.rb +56 -0
- data/spec/iruby/dataframe_spec.rb +170 -0
- data/spec/iruby/helpers_spec.rb +49 -0
- data/spec/iruby/multi_index_spec.rb +37 -0
- data/spec/iruby/vector_spec.rb +105 -0
- data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
- data/spec/maths/arithmetic/vector_spec.rb +165 -0
- data/spec/maths/statistics/dataframe_spec.rb +178 -0
- data/spec/maths/statistics/vector_spec.rb +756 -0
- data/spec/monkeys_spec.rb +42 -0
- data/spec/shared/vector_display_spec.rb +213 -0
- data/spec/spec_helper.rb +87 -0
- data/spec/support/database_helper.rb +30 -0
- data/spec/support/matchers.rb +5 -0
- data/spec/vector_spec.rb +2293 -0
- metadata +571 -0
@@ -0,0 +1,197 @@
|
|
1
|
+
describe DaruLite::Vector do
|
2
|
+
context "#initialize" do
|
3
|
+
it "accepts DateTimeIndex in index option" do
|
4
|
+
index = DaruLite::DateTimeIndex.date_range(:start => DateTime.new(2012,2,1), periods: 100)
|
5
|
+
vector = DaruLite::Vector.new [1,2,3,4,5]*20, index: index
|
6
|
+
|
7
|
+
expect(vector.class).to eq(DaruLite::Vector)
|
8
|
+
expect(vector['2012-2-3']).to eq(3)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
context "#[]" do
|
13
|
+
before do
|
14
|
+
index = DaruLite::DateTimeIndex.date_range(
|
15
|
+
:start => DateTime.new(2012,4,4), :end => DateTime.new(2012,4,7), freq: 'H')
|
16
|
+
@vector = DaruLite::Vector.new([23]*index.size, index: index)
|
17
|
+
end
|
18
|
+
|
19
|
+
it "returns the element when complete date" do
|
20
|
+
expect(@vector['2012-4-4 22:00:00']).to eq(23)
|
21
|
+
end
|
22
|
+
|
23
|
+
it "accepts DateTime object for [] argument" do
|
24
|
+
expect(@vector[DateTime.new(2012,4,4,22)]).to eq(23)
|
25
|
+
end
|
26
|
+
|
27
|
+
it "returns slice when partial date" do
|
28
|
+
slice_index = DaruLite::DateTimeIndex.date_range(
|
29
|
+
:start => DateTime.new(2012,4,4), :periods => 24, freq: 'H')
|
30
|
+
expect(@vector['2012-4-4']).to eq(
|
31
|
+
DaruLite::Vector.new([23]*slice_index.size, index: slice_index))
|
32
|
+
end
|
33
|
+
|
34
|
+
it "returns a slice when range" do
|
35
|
+
slice_index = DaruLite::DateTimeIndex.date_range(
|
36
|
+
:start => DateTime.new(2012,4,4), :end => DateTime.new(2012,4,5,23,), freq: 'H')
|
37
|
+
expect(@vector['2012-4-4'..'2012-4-5']).to eq(
|
38
|
+
DaruLite::Vector.new([23]*slice_index.size, index: slice_index))
|
39
|
+
end
|
40
|
+
|
41
|
+
it "returns a slice when numeric range" do
|
42
|
+
slice_index = DaruLite::DateTimeIndex.date_range(
|
43
|
+
:start => DateTime.new(2012,4,4), :periods => 20, :freq => 'H')
|
44
|
+
expect(@vector[0..19]).to eq(
|
45
|
+
DaruLite::Vector.new([23]*slice_index.size, index: slice_index))
|
46
|
+
end
|
47
|
+
|
48
|
+
it "returns the element when number" do
|
49
|
+
expect(@vector[32]).to eq(23)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
context "#[]=" do
|
54
|
+
it "assigns a single element when index complete" do
|
55
|
+
index = DaruLite::DateTimeIndex.date_range(:start => '2012', :periods => 5, :freq => 'D')
|
56
|
+
vector = DaruLite::Vector.new([1,2,3,4,5], index: index)
|
57
|
+
vector['2012-1-4'] = 666
|
58
|
+
expect(vector).to eq(DaruLite::Vector.new([1,2,3,666,5], index: index))
|
59
|
+
end
|
60
|
+
|
61
|
+
it "assigns single element when specified a number for indexing" do
|
62
|
+
index = DaruLite::DateTimeIndex.date_range(:start => '2012', :periods => 5, :freq => 'D')
|
63
|
+
vector = DaruLite::Vector.new([1,2,3,4,5], index: index)
|
64
|
+
|
65
|
+
vector[2] = 666
|
66
|
+
expect(vector).to eq(
|
67
|
+
DaruLite::Vector.new([1,2,666,4,5], index: index))
|
68
|
+
end
|
69
|
+
|
70
|
+
it "assigns multiple elements when index incomplete" do
|
71
|
+
index = DaruLite::DateTimeIndex.date_range(:start => '2012', :periods => 100,
|
72
|
+
:freq => 'MB')
|
73
|
+
vector = DaruLite::Vector.new([1,2,3,4,5,6,7,8,9,10]*10, index: index)
|
74
|
+
vector['2012'] = 666
|
75
|
+
arr = [666]*12 + [3,4,5,6,7,8,9,10] + [1,2,3,4,5,6,7,8,9,10]*8
|
76
|
+
expect(vector).to eq(DaruLite::Vector.new(arr, index: index))
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
describe DaruLite::DataFrame do
|
82
|
+
before :each do
|
83
|
+
@index = DaruLite::DateTimeIndex.date_range(:start => '2012-2-1', periods: 100)
|
84
|
+
@order = DaruLite::DateTimeIndex.new([
|
85
|
+
DateTime.new(2012,1,3),DateTime.new(2013,2,3),DateTime.new(2012,3,3)])
|
86
|
+
@a = [1,2,3,4,5]*20
|
87
|
+
@b = @a.map { |e| e*3 }
|
88
|
+
@c = @a.map(&:to_s)
|
89
|
+
@df = DaruLite::DataFrame.new([@a, @b, @c], index: @index, order: @order)
|
90
|
+
end
|
91
|
+
|
92
|
+
context "#initialize" do
|
93
|
+
it "accepts DateTimeIndex for index and order options" do
|
94
|
+
expect(@df.index).to eq(@index)
|
95
|
+
expect(@df['2013-2-3']).to eq(
|
96
|
+
DaruLite::Vector.new(@b, index: @index))
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
context "#[]" do
|
101
|
+
it "returns one Vector when complete index" do
|
102
|
+
expect(@df['2012-3-3']).to eq(DaruLite::Vector.new(@c, index: @index))
|
103
|
+
end
|
104
|
+
|
105
|
+
it "returns a Vector when DateTime object specified" do
|
106
|
+
expect(@df[DateTime.new(2012,3,3)]).to eq(
|
107
|
+
DaruLite::Vector.new(@c, index: @index))
|
108
|
+
end
|
109
|
+
|
110
|
+
it "returns DataFrame when incomplete index" do
|
111
|
+
answer = DaruLite::DataFrame.new(
|
112
|
+
[@a, @c], index: @index, order: DaruLite::DateTimeIndex.new([
|
113
|
+
DateTime.new(2012,1,3),DateTime.new(2012,3,3)]))
|
114
|
+
expect(@df['2012']).to eq(answer)
|
115
|
+
end
|
116
|
+
|
117
|
+
it "returns Vector when single index specified as a number" do
|
118
|
+
expect(@df[1]).to eq(DaruLite::Vector.new(@b, index: @index))
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
context "#[]=" do
|
123
|
+
it "assigns one Vector when complete index" do
|
124
|
+
answer = DaruLite::DataFrame.new([@a, @b, @a], index: @index, order: @order)
|
125
|
+
@df['2012-3-3'] = @a
|
126
|
+
expect(@df).to eq(answer)
|
127
|
+
end
|
128
|
+
|
129
|
+
it "assigns one Vector when index as DateTime object" do
|
130
|
+
answer = DaruLite::DataFrame.new([@a, @b, @a], index: @index, order: @order)
|
131
|
+
@df[DateTime.new(2012,3,3)] = @a
|
132
|
+
expect(@df).to eq(answer)
|
133
|
+
end
|
134
|
+
|
135
|
+
it "assigns multiple vectors when incomplete index" do
|
136
|
+
answer = DaruLite::DataFrame.new([@b,@b,@b], index: @index, order: @order)
|
137
|
+
@df['2012'] = @b
|
138
|
+
expect(@df).to eq(answer)
|
139
|
+
end
|
140
|
+
|
141
|
+
it "assigns Vector when specified position index" do
|
142
|
+
answer = DaruLite::DataFrame.new([@a, @b, @a], index: @index, order: @order)
|
143
|
+
@df[2] = @a
|
144
|
+
expect(@df).to eq(answer)
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
context "#row[]" do
|
149
|
+
it "returns one row Vector when complete index" do
|
150
|
+
expect(@df.row['2012-2-1']).to eq(DaruLite::Vector.new([1,3,"1"], index: @order))
|
151
|
+
end
|
152
|
+
|
153
|
+
it "returns one row when complete DateTime specified" do
|
154
|
+
expect(@df.row[DateTime.new(2012,2,1)]).to eq(
|
155
|
+
DaruLite::Vector.new([1,3,"1"], index: @order))
|
156
|
+
end
|
157
|
+
|
158
|
+
it "returns DataFrame when incomplete index" do
|
159
|
+
range = 0..28
|
160
|
+
a = @a[range]
|
161
|
+
b = @b[range]
|
162
|
+
c = @c[range]
|
163
|
+
i = DaruLite::DateTimeIndex.date_range(:start => '2012-2-1', periods: 29)
|
164
|
+
answer = DaruLite::DataFrame.new([a,b,c], index: i, order: @order)
|
165
|
+
|
166
|
+
expect(@df.row['2012-2']).to eq(answer)
|
167
|
+
end
|
168
|
+
|
169
|
+
it "returns one row Vector when position index" do
|
170
|
+
expect(@df.row[2]).to eq(DaruLite::Vector.new([3,9,'3'], index: @order))
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
context "#row[]=" do
|
175
|
+
it "assigns one row Vector when complete index" do
|
176
|
+
@df.row['2012-2-4'] = [666,999,0]
|
177
|
+
expect(@df.row['2012-2-4']).to eq(DaruLite::Vector.new([666,999,0], index: @order))
|
178
|
+
end
|
179
|
+
|
180
|
+
it "assigns one row Vector when complete index as DateTime" do
|
181
|
+
@df.row[DateTime.new(2012,2,5)] = [1,2,3]
|
182
|
+
expect(@df.row[DateTime.new(2012,2,5)]).to eq(
|
183
|
+
DaruLite::Vector.new([1,2,3], index: @order))
|
184
|
+
end
|
185
|
+
|
186
|
+
it "assigns multiple rows when incomplete index" do
|
187
|
+
a = [666]*29
|
188
|
+
b = [999]*29
|
189
|
+
c = [0]*29
|
190
|
+
index = DaruLite::DateTimeIndex.date_range(:start => '2012-2-1', :periods => 29)
|
191
|
+
answer = DaruLite::DataFrame.new([a,b,c], index: index, order: @order)
|
192
|
+
@df.row['2012-2'] = [666,999,0]
|
193
|
+
|
194
|
+
expect(@df.row['2012-2']).to eq(answer)
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
include DaruLite
|
2
|
+
|
3
|
+
describe DaruLite::DateTimeIndexHelper do
|
4
|
+
|
5
|
+
|
6
|
+
describe '.infer_offset' do
|
7
|
+
subject(:offset) { DaruLite::DateTimeIndexHelper.infer_offset(data) }
|
8
|
+
|
9
|
+
context 'when the dataset does not have a regular offset' do
|
10
|
+
let(:data) do
|
11
|
+
[
|
12
|
+
DateTime.new(2020, 1, 1, 00, 00, 00),
|
13
|
+
DateTime.new(2020, 1, 1, 00, 01, 00),
|
14
|
+
DateTime.new(2020, 1, 1, 00, 05, 00),
|
15
|
+
]
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'returns nil' do
|
19
|
+
expect(offset).to be_nil
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
context 'when the dataset matches a defined offset' do
|
24
|
+
let(:data) do
|
25
|
+
[
|
26
|
+
DateTime.new(2020, 1, 1, 00, 00, 00),
|
27
|
+
DateTime.new(2020, 1, 1, 00, 01, 00),
|
28
|
+
DateTime.new(2020, 1, 1, 00, 02, 00),
|
29
|
+
]
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'returns the matched offset' do
|
33
|
+
expect(offset).to be_an_instance_of(DaruLite::Offsets::Minute)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
context 'when the offset is a multiple of seconds' do
|
38
|
+
let(:data) do
|
39
|
+
[
|
40
|
+
DateTime.new(2020, 1, 1, 00, 00, 00),
|
41
|
+
DateTime.new(2020, 1, 1, 00, 00, 03),
|
42
|
+
DateTime.new(2020, 1, 1, 00, 00, 06),
|
43
|
+
]
|
44
|
+
end
|
45
|
+
|
46
|
+
let(:expected_offset) { DaruLite::Offsets::Second.new(3) }
|
47
|
+
|
48
|
+
it 'returns a Second offset' do
|
49
|
+
expect(offset).to be_an_instance_of(DaruLite::Offsets::Second)
|
50
|
+
end
|
51
|
+
|
52
|
+
it 'has the correct multiplier' do
|
53
|
+
expect(offset.freq_string).to eql(expected_offset.freq_string)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
context 'when the offset is less than a second' do
|
58
|
+
let(:data) do
|
59
|
+
[
|
60
|
+
DateTime.new(2020, 1, 1, 00, 00, 00) + 0.00001,
|
61
|
+
DateTime.new(2020, 1, 1, 00, 00, 00) + 0.00002,
|
62
|
+
DateTime.new(2020, 1, 1, 00, 00, 00) + 0.00003,
|
63
|
+
]
|
64
|
+
end
|
65
|
+
|
66
|
+
it 'returns nil' do
|
67
|
+
expect(offset).to be_nil
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|