daru_lite 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.github/workflows/ci.yml +33 -0
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/.rubocop.yml +27 -0
- data/.rubocop_todo.yml +137 -0
- data/CONTRIBUTING.md +47 -0
- data/Gemfile +2 -0
- data/History.md +4 -0
- data/LICENSE +24 -0
- data/README.md +218 -0
- data/Rakefile +69 -0
- data/ReleasePolicy.md +20 -0
- data/benchmarks/TradeoffData.csv +65 -0
- data/benchmarks/csv_reading.rb +22 -0
- data/benchmarks/dataframe_creation.rb +39 -0
- data/benchmarks/db_loading.rb +34 -0
- data/benchmarks/duplicating.rb +45 -0
- data/benchmarks/group_by.rb +32 -0
- data/benchmarks/joining.rb +52 -0
- data/benchmarks/row_access.rb +41 -0
- data/benchmarks/row_assign.rb +36 -0
- data/benchmarks/sorting.rb +51 -0
- data/benchmarks/statistics.rb +28 -0
- data/benchmarks/vector_access.rb +31 -0
- data/benchmarks/vector_assign.rb +42 -0
- data/benchmarks/where_clause.rb +48 -0
- data/benchmarks/where_vs_filter.rb +28 -0
- data/daru_lite.gemspec +55 -0
- data/images/README.md +5 -0
- data/images/con0.png +0 -0
- data/images/con1.png +0 -0
- data/images/init0.png +0 -0
- data/images/init1.png +0 -0
- data/images/man0.png +0 -0
- data/images/man1.png +0 -0
- data/images/man2.png +0 -0
- data/images/man3.png +0 -0
- data/images/man4.png +0 -0
- data/images/man5.png +0 -0
- data/images/man6.png +0 -0
- data/lib/daru_lite/accessors/array_wrapper.rb +109 -0
- data/lib/daru_lite/accessors/dataframe_by_row.rb +25 -0
- data/lib/daru_lite/accessors/mdarray_wrapper.rb +7 -0
- data/lib/daru_lite/category.rb +929 -0
- data/lib/daru_lite/configuration.rb +34 -0
- data/lib/daru_lite/core/group_by.rb +403 -0
- data/lib/daru_lite/core/merge.rb +270 -0
- data/lib/daru_lite/core/query.rb +109 -0
- data/lib/daru_lite/dataframe.rb +3080 -0
- data/lib/daru_lite/date_time/index.rb +569 -0
- data/lib/daru_lite/date_time/offsets.rb +397 -0
- data/lib/daru_lite/exceptions.rb +2 -0
- data/lib/daru_lite/extensions/which_dsl.rb +53 -0
- data/lib/daru_lite/formatters/table.rb +52 -0
- data/lib/daru_lite/helpers/array.rb +53 -0
- data/lib/daru_lite/index/categorical_index.rb +201 -0
- data/lib/daru_lite/index/index.rb +374 -0
- data/lib/daru_lite/index/multi_index.rb +374 -0
- data/lib/daru_lite/io/csv/converters.rb +21 -0
- data/lib/daru_lite/io/io.rb +294 -0
- data/lib/daru_lite/io/sql_data_source.rb +97 -0
- data/lib/daru_lite/iruby/helpers.rb +38 -0
- data/lib/daru_lite/iruby/templates/dataframe.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
- data/lib/daru_lite/iruby/templates/dataframe_mi_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/dataframe_tbody.html.erb +28 -0
- data/lib/daru_lite/iruby/templates/dataframe_thead.html.erb +21 -0
- data/lib/daru_lite/iruby/templates/multi_index.html.erb +12 -0
- data/lib/daru_lite/iruby/templates/vector.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi.html.erb +5 -0
- data/lib/daru_lite/iruby/templates/vector_mi_tbody.html.erb +26 -0
- data/lib/daru_lite/iruby/templates/vector_mi_thead.html.erb +8 -0
- data/lib/daru_lite/iruby/templates/vector_tbody.html.erb +17 -0
- data/lib/daru_lite/iruby/templates/vector_thead.html.erb +8 -0
- data/lib/daru_lite/maths/arithmetic/dataframe.rb +91 -0
- data/lib/daru_lite/maths/arithmetic/vector.rb +117 -0
- data/lib/daru_lite/maths/statistics/dataframe.rb +202 -0
- data/lib/daru_lite/maths/statistics/vector.rb +1019 -0
- data/lib/daru_lite/monkeys.rb +56 -0
- data/lib/daru_lite/vector.rb +1678 -0
- data/lib/daru_lite/version.rb +3 -0
- data/lib/daru_lite.rb +99 -0
- data/profile/_base.rb +23 -0
- data/profile/df_to_a.rb +10 -0
- data/profile/filter.rb +13 -0
- data/profile/joining.rb +13 -0
- data/profile/sorting.rb +12 -0
- data/profile/vector_each_with_index.rb +9 -0
- data/profile/vector_new.rb +9 -0
- data/spec/accessors/array_wrapper_spec.rb +3 -0
- data/spec/category_spec.rb +1741 -0
- data/spec/core/group_by_spec.rb +655 -0
- data/spec/core/merge_spec.rb +179 -0
- data/spec/core/query_spec.rb +347 -0
- data/spec/daru_lite_spec.rb +22 -0
- data/spec/dataframe_spec.rb +4330 -0
- data/spec/date_time/data_spec.rb +197 -0
- data/spec/date_time/date_time_index_helper_spec.rb +72 -0
- data/spec/date_time/index_spec.rb +588 -0
- data/spec/date_time/offsets_spec.rb +465 -0
- data/spec/extensions/which_dsl_spec.rb +38 -0
- data/spec/fixtures/bank2.dat +200 -0
- data/spec/fixtures/boolean_converter_test.csv +5 -0
- data/spec/fixtures/countries.json +7794 -0
- data/spec/fixtures/duplicates.csv +32 -0
- data/spec/fixtures/eciresults.html +394 -0
- data/spec/fixtures/empties.dat +2 -0
- data/spec/fixtures/empty_rows_test.csv +17 -0
- data/spec/fixtures/macau.html +3691 -0
- data/spec/fixtures/macd_data.csv +150 -0
- data/spec/fixtures/matrix_test.csv +100 -0
- data/spec/fixtures/moneycontrol.html +6812 -0
- data/spec/fixtures/music_data.tsv +2501 -0
- data/spec/fixtures/repeated_fields.csv +7 -0
- data/spec/fixtures/sales-funnel.csv +18 -0
- data/spec/fixtures/scientific_notation.csv +4 -0
- data/spec/fixtures/string_converter_test.csv +5 -0
- data/spec/fixtures/strings.dat +2 -0
- data/spec/fixtures/test_xls.xls +0 -0
- data/spec/fixtures/test_xls_2.xls +0 -0
- data/spec/fixtures/url_test.txt~ +0 -0
- data/spec/fixtures/valid_markup.html +62 -0
- data/spec/fixtures/wiki_climate.html +1243 -0
- data/spec/fixtures/wiki_table_info.html +631 -0
- data/spec/formatters/table_formatter_spec.rb +137 -0
- data/spec/helpers_spec.rb +8 -0
- data/spec/index/categorical_index_spec.rb +170 -0
- data/spec/index/index_spec.rb +417 -0
- data/spec/index/multi_index_spec.rb +680 -0
- data/spec/io/io_spec.rb +373 -0
- data/spec/io/sql_data_source_spec.rb +56 -0
- data/spec/iruby/dataframe_spec.rb +170 -0
- data/spec/iruby/helpers_spec.rb +49 -0
- data/spec/iruby/multi_index_spec.rb +37 -0
- data/spec/iruby/vector_spec.rb +105 -0
- data/spec/maths/arithmetic/dataframe_spec.rb +148 -0
- data/spec/maths/arithmetic/vector_spec.rb +165 -0
- data/spec/maths/statistics/dataframe_spec.rb +178 -0
- data/spec/maths/statistics/vector_spec.rb +756 -0
- data/spec/monkeys_spec.rb +42 -0
- data/spec/shared/vector_display_spec.rb +213 -0
- data/spec/spec_helper.rb +87 -0
- data/spec/support/database_helper.rb +30 -0
- data/spec/support/matchers.rb +5 -0
- data/spec/vector_spec.rb +2293 -0
- metadata +571 -0
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
describe DaruLite::Vector do
|
|
2
|
+
context "#initialize" do
|
|
3
|
+
it "accepts DateTimeIndex in index option" do
|
|
4
|
+
index = DaruLite::DateTimeIndex.date_range(:start => DateTime.new(2012,2,1), periods: 100)
|
|
5
|
+
vector = DaruLite::Vector.new [1,2,3,4,5]*20, index: index
|
|
6
|
+
|
|
7
|
+
expect(vector.class).to eq(DaruLite::Vector)
|
|
8
|
+
expect(vector['2012-2-3']).to eq(3)
|
|
9
|
+
end
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
context "#[]" do
|
|
13
|
+
before do
|
|
14
|
+
index = DaruLite::DateTimeIndex.date_range(
|
|
15
|
+
:start => DateTime.new(2012,4,4), :end => DateTime.new(2012,4,7), freq: 'H')
|
|
16
|
+
@vector = DaruLite::Vector.new([23]*index.size, index: index)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
it "returns the element when complete date" do
|
|
20
|
+
expect(@vector['2012-4-4 22:00:00']).to eq(23)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
it "accepts DateTime object for [] argument" do
|
|
24
|
+
expect(@vector[DateTime.new(2012,4,4,22)]).to eq(23)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
it "returns slice when partial date" do
|
|
28
|
+
slice_index = DaruLite::DateTimeIndex.date_range(
|
|
29
|
+
:start => DateTime.new(2012,4,4), :periods => 24, freq: 'H')
|
|
30
|
+
expect(@vector['2012-4-4']).to eq(
|
|
31
|
+
DaruLite::Vector.new([23]*slice_index.size, index: slice_index))
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
it "returns a slice when range" do
|
|
35
|
+
slice_index = DaruLite::DateTimeIndex.date_range(
|
|
36
|
+
:start => DateTime.new(2012,4,4), :end => DateTime.new(2012,4,5,23,), freq: 'H')
|
|
37
|
+
expect(@vector['2012-4-4'..'2012-4-5']).to eq(
|
|
38
|
+
DaruLite::Vector.new([23]*slice_index.size, index: slice_index))
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
it "returns a slice when numeric range" do
|
|
42
|
+
slice_index = DaruLite::DateTimeIndex.date_range(
|
|
43
|
+
:start => DateTime.new(2012,4,4), :periods => 20, :freq => 'H')
|
|
44
|
+
expect(@vector[0..19]).to eq(
|
|
45
|
+
DaruLite::Vector.new([23]*slice_index.size, index: slice_index))
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
it "returns the element when number" do
|
|
49
|
+
expect(@vector[32]).to eq(23)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
context "#[]=" do
|
|
54
|
+
it "assigns a single element when index complete" do
|
|
55
|
+
index = DaruLite::DateTimeIndex.date_range(:start => '2012', :periods => 5, :freq => 'D')
|
|
56
|
+
vector = DaruLite::Vector.new([1,2,3,4,5], index: index)
|
|
57
|
+
vector['2012-1-4'] = 666
|
|
58
|
+
expect(vector).to eq(DaruLite::Vector.new([1,2,3,666,5], index: index))
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
it "assigns single element when specified a number for indexing" do
|
|
62
|
+
index = DaruLite::DateTimeIndex.date_range(:start => '2012', :periods => 5, :freq => 'D')
|
|
63
|
+
vector = DaruLite::Vector.new([1,2,3,4,5], index: index)
|
|
64
|
+
|
|
65
|
+
vector[2] = 666
|
|
66
|
+
expect(vector).to eq(
|
|
67
|
+
DaruLite::Vector.new([1,2,666,4,5], index: index))
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
it "assigns multiple elements when index incomplete" do
|
|
71
|
+
index = DaruLite::DateTimeIndex.date_range(:start => '2012', :periods => 100,
|
|
72
|
+
:freq => 'MB')
|
|
73
|
+
vector = DaruLite::Vector.new([1,2,3,4,5,6,7,8,9,10]*10, index: index)
|
|
74
|
+
vector['2012'] = 666
|
|
75
|
+
arr = [666]*12 + [3,4,5,6,7,8,9,10] + [1,2,3,4,5,6,7,8,9,10]*8
|
|
76
|
+
expect(vector).to eq(DaruLite::Vector.new(arr, index: index))
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
describe DaruLite::DataFrame do
|
|
82
|
+
before :each do
|
|
83
|
+
@index = DaruLite::DateTimeIndex.date_range(:start => '2012-2-1', periods: 100)
|
|
84
|
+
@order = DaruLite::DateTimeIndex.new([
|
|
85
|
+
DateTime.new(2012,1,3),DateTime.new(2013,2,3),DateTime.new(2012,3,3)])
|
|
86
|
+
@a = [1,2,3,4,5]*20
|
|
87
|
+
@b = @a.map { |e| e*3 }
|
|
88
|
+
@c = @a.map(&:to_s)
|
|
89
|
+
@df = DaruLite::DataFrame.new([@a, @b, @c], index: @index, order: @order)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
context "#initialize" do
|
|
93
|
+
it "accepts DateTimeIndex for index and order options" do
|
|
94
|
+
expect(@df.index).to eq(@index)
|
|
95
|
+
expect(@df['2013-2-3']).to eq(
|
|
96
|
+
DaruLite::Vector.new(@b, index: @index))
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
context "#[]" do
|
|
101
|
+
it "returns one Vector when complete index" do
|
|
102
|
+
expect(@df['2012-3-3']).to eq(DaruLite::Vector.new(@c, index: @index))
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
it "returns a Vector when DateTime object specified" do
|
|
106
|
+
expect(@df[DateTime.new(2012,3,3)]).to eq(
|
|
107
|
+
DaruLite::Vector.new(@c, index: @index))
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
it "returns DataFrame when incomplete index" do
|
|
111
|
+
answer = DaruLite::DataFrame.new(
|
|
112
|
+
[@a, @c], index: @index, order: DaruLite::DateTimeIndex.new([
|
|
113
|
+
DateTime.new(2012,1,3),DateTime.new(2012,3,3)]))
|
|
114
|
+
expect(@df['2012']).to eq(answer)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
it "returns Vector when single index specified as a number" do
|
|
118
|
+
expect(@df[1]).to eq(DaruLite::Vector.new(@b, index: @index))
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
context "#[]=" do
|
|
123
|
+
it "assigns one Vector when complete index" do
|
|
124
|
+
answer = DaruLite::DataFrame.new([@a, @b, @a], index: @index, order: @order)
|
|
125
|
+
@df['2012-3-3'] = @a
|
|
126
|
+
expect(@df).to eq(answer)
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
it "assigns one Vector when index as DateTime object" do
|
|
130
|
+
answer = DaruLite::DataFrame.new([@a, @b, @a], index: @index, order: @order)
|
|
131
|
+
@df[DateTime.new(2012,3,3)] = @a
|
|
132
|
+
expect(@df).to eq(answer)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
it "assigns multiple vectors when incomplete index" do
|
|
136
|
+
answer = DaruLite::DataFrame.new([@b,@b,@b], index: @index, order: @order)
|
|
137
|
+
@df['2012'] = @b
|
|
138
|
+
expect(@df).to eq(answer)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
it "assigns Vector when specified position index" do
|
|
142
|
+
answer = DaruLite::DataFrame.new([@a, @b, @a], index: @index, order: @order)
|
|
143
|
+
@df[2] = @a
|
|
144
|
+
expect(@df).to eq(answer)
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
context "#row[]" do
|
|
149
|
+
it "returns one row Vector when complete index" do
|
|
150
|
+
expect(@df.row['2012-2-1']).to eq(DaruLite::Vector.new([1,3,"1"], index: @order))
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
it "returns one row when complete DateTime specified" do
|
|
154
|
+
expect(@df.row[DateTime.new(2012,2,1)]).to eq(
|
|
155
|
+
DaruLite::Vector.new([1,3,"1"], index: @order))
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
it "returns DataFrame when incomplete index" do
|
|
159
|
+
range = 0..28
|
|
160
|
+
a = @a[range]
|
|
161
|
+
b = @b[range]
|
|
162
|
+
c = @c[range]
|
|
163
|
+
i = DaruLite::DateTimeIndex.date_range(:start => '2012-2-1', periods: 29)
|
|
164
|
+
answer = DaruLite::DataFrame.new([a,b,c], index: i, order: @order)
|
|
165
|
+
|
|
166
|
+
expect(@df.row['2012-2']).to eq(answer)
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
it "returns one row Vector when position index" do
|
|
170
|
+
expect(@df.row[2]).to eq(DaruLite::Vector.new([3,9,'3'], index: @order))
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
context "#row[]=" do
|
|
175
|
+
it "assigns one row Vector when complete index" do
|
|
176
|
+
@df.row['2012-2-4'] = [666,999,0]
|
|
177
|
+
expect(@df.row['2012-2-4']).to eq(DaruLite::Vector.new([666,999,0], index: @order))
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
it "assigns one row Vector when complete index as DateTime" do
|
|
181
|
+
@df.row[DateTime.new(2012,2,5)] = [1,2,3]
|
|
182
|
+
expect(@df.row[DateTime.new(2012,2,5)]).to eq(
|
|
183
|
+
DaruLite::Vector.new([1,2,3], index: @order))
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
it "assigns multiple rows when incomplete index" do
|
|
187
|
+
a = [666]*29
|
|
188
|
+
b = [999]*29
|
|
189
|
+
c = [0]*29
|
|
190
|
+
index = DaruLite::DateTimeIndex.date_range(:start => '2012-2-1', :periods => 29)
|
|
191
|
+
answer = DaruLite::DataFrame.new([a,b,c], index: index, order: @order)
|
|
192
|
+
@df.row['2012-2'] = [666,999,0]
|
|
193
|
+
|
|
194
|
+
expect(@df.row['2012-2']).to eq(answer)
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
end
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
include DaruLite
|
|
2
|
+
|
|
3
|
+
describe DaruLite::DateTimeIndexHelper do
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
describe '.infer_offset' do
|
|
7
|
+
subject(:offset) { DaruLite::DateTimeIndexHelper.infer_offset(data) }
|
|
8
|
+
|
|
9
|
+
context 'when the dataset does not have a regular offset' do
|
|
10
|
+
let(:data) do
|
|
11
|
+
[
|
|
12
|
+
DateTime.new(2020, 1, 1, 00, 00, 00),
|
|
13
|
+
DateTime.new(2020, 1, 1, 00, 01, 00),
|
|
14
|
+
DateTime.new(2020, 1, 1, 00, 05, 00),
|
|
15
|
+
]
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
it 'returns nil' do
|
|
19
|
+
expect(offset).to be_nil
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
context 'when the dataset matches a defined offset' do
|
|
24
|
+
let(:data) do
|
|
25
|
+
[
|
|
26
|
+
DateTime.new(2020, 1, 1, 00, 00, 00),
|
|
27
|
+
DateTime.new(2020, 1, 1, 00, 01, 00),
|
|
28
|
+
DateTime.new(2020, 1, 1, 00, 02, 00),
|
|
29
|
+
]
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
it 'returns the matched offset' do
|
|
33
|
+
expect(offset).to be_an_instance_of(DaruLite::Offsets::Minute)
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
context 'when the offset is a multiple of seconds' do
|
|
38
|
+
let(:data) do
|
|
39
|
+
[
|
|
40
|
+
DateTime.new(2020, 1, 1, 00, 00, 00),
|
|
41
|
+
DateTime.new(2020, 1, 1, 00, 00, 03),
|
|
42
|
+
DateTime.new(2020, 1, 1, 00, 00, 06),
|
|
43
|
+
]
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
let(:expected_offset) { DaruLite::Offsets::Second.new(3) }
|
|
47
|
+
|
|
48
|
+
it 'returns a Second offset' do
|
|
49
|
+
expect(offset).to be_an_instance_of(DaruLite::Offsets::Second)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
it 'has the correct multiplier' do
|
|
53
|
+
expect(offset.freq_string).to eql(expected_offset.freq_string)
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
context 'when the offset is less than a second' do
|
|
58
|
+
let(:data) do
|
|
59
|
+
[
|
|
60
|
+
DateTime.new(2020, 1, 1, 00, 00, 00) + 0.00001,
|
|
61
|
+
DateTime.new(2020, 1, 1, 00, 00, 00) + 0.00002,
|
|
62
|
+
DateTime.new(2020, 1, 1, 00, 00, 00) + 0.00003,
|
|
63
|
+
]
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
it 'returns nil' do
|
|
67
|
+
expect(offset).to be_nil
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
end
|