daru 0.1.5 → 0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/ISSUE_TEMPLATE.md +18 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +21 -7
- data/.travis.yml +10 -5
- data/CONTRIBUTING.md +15 -10
- data/History.md +124 -2
- data/README.md +37 -9
- data/ReleasePolicy.md +20 -0
- data/benchmarks/db_loading.rb +34 -0
- data/benchmarks/statistics.rb +6 -6
- data/benchmarks/where_clause.rb +1 -1
- data/benchmarks/where_vs_filter.rb +1 -1
- data/daru.gemspec +17 -41
- data/lib/daru.rb +10 -13
- data/lib/daru/accessors/gsl_wrapper.rb +1 -1
- data/lib/daru/accessors/nmatrix_wrapper.rb +2 -0
- data/lib/daru/category.rb +29 -15
- data/lib/daru/configuration.rb +34 -0
- data/lib/daru/core/group_by.rb +158 -77
- data/lib/daru/core/merge.rb +12 -3
- data/lib/daru/core/query.rb +20 -4
- data/lib/daru/dataframe.rb +692 -118
- data/lib/daru/date_time/index.rb +14 -11
- data/lib/daru/date_time/offsets.rb +9 -1
- data/lib/daru/extensions/which_dsl.rb +55 -0
- data/lib/daru/formatters/table.rb +3 -5
- data/lib/daru/index/categorical_index.rb +4 -4
- data/lib/daru/index/index.rb +131 -42
- data/lib/daru/index/multi_index.rb +118 -10
- data/lib/daru/io/csv/converters.rb +21 -0
- data/lib/daru/io/io.rb +105 -33
- data/lib/daru/io/sql_data_source.rb +10 -0
- data/lib/daru/iruby/templates/dataframe.html.erb +4 -51
- data/lib/daru/iruby/templates/dataframe_mi.html.erb +3 -56
- data/lib/daru/iruby/templates/dataframe_mi_tbody.html.erb +35 -0
- data/lib/daru/iruby/templates/dataframe_mi_thead.html.erb +21 -0
- data/lib/daru/iruby/templates/dataframe_tbody.html.erb +28 -0
- data/lib/daru/iruby/templates/dataframe_thead.html.erb +21 -0
- data/lib/daru/iruby/templates/vector.html.erb +3 -25
- data/lib/daru/iruby/templates/vector_mi.html.erb +3 -34
- data/lib/daru/iruby/templates/vector_mi_tbody.html.erb +26 -0
- data/lib/daru/iruby/templates/vector_mi_thead.html.erb +8 -0
- data/lib/daru/iruby/templates/vector_tbody.html.erb +17 -0
- data/lib/daru/iruby/templates/vector_thead.html.erb +8 -0
- data/lib/daru/maths/arithmetic/vector.rb +38 -2
- data/lib/daru/maths/statistics/dataframe.rb +28 -30
- data/lib/daru/maths/statistics/vector.rb +295 -41
- data/lib/daru/plotting/gruff/dataframe.rb +13 -15
- data/lib/daru/plotting/nyaplot/category.rb +1 -1
- data/lib/daru/plotting/nyaplot/dataframe.rb +15 -4
- data/lib/daru/plotting/nyaplot/vector.rb +1 -2
- data/lib/daru/vector.rb +308 -96
- data/lib/daru/version.rb +1 -1
- data/profile/vector_new.rb +9 -0
- data/spec/accessors/gsl_wrapper_spec.rb +38 -35
- data/spec/accessors/nmatrix_wrapper_spec.rb +25 -22
- data/spec/category_spec.rb +24 -20
- data/spec/core/group_by_spec.rb +238 -4
- data/spec/core/merge_spec.rb +1 -1
- data/spec/core/query_spec.rb +65 -50
- data/spec/daru_spec.rb +22 -0
- data/spec/dataframe_spec.rb +473 -16
- data/spec/date_time/date_time_index_helper_spec.rb +72 -0
- data/spec/date_time/index_spec.rb +34 -16
- data/spec/date_time/offsets_spec.rb +14 -0
- data/spec/extensions/rserve_spec.rb +1 -1
- data/spec/extensions/which_dsl_spec.rb +38 -0
- data/spec/fixtures/boolean_converter_test.csv +5 -0
- data/spec/fixtures/duplicates.csv +32 -0
- data/spec/fixtures/eciresults.html +394 -0
- data/spec/fixtures/empty_rows_test.csv +17 -0
- data/spec/fixtures/macau.html +3691 -0
- data/spec/fixtures/macd_data.csv +150 -0
- data/spec/fixtures/matrix_test.csv +55 -55
- data/spec/fixtures/moneycontrol.html +6812 -0
- data/spec/fixtures/string_converter_test.csv +5 -0
- data/spec/fixtures/test_xls.xls +0 -0
- data/spec/fixtures/test_xls_2.xls +0 -0
- data/spec/fixtures/url_test.txt~ +0 -0
- data/spec/fixtures/valid_markup.html +62 -0
- data/spec/fixtures/wiki_climate.html +1243 -0
- data/spec/fixtures/wiki_table_info.html +631 -0
- data/spec/formatters/table_formatter_spec.rb +29 -0
- data/spec/index/categorical_index_spec.rb +33 -33
- data/spec/index/index_spec.rb +160 -41
- data/spec/index/multi_index_spec.rb +143 -33
- data/spec/io/io_spec.rb +246 -2
- data/spec/io/sql_data_source_spec.rb +31 -41
- data/spec/iruby/dataframe_spec.rb +17 -19
- data/spec/iruby/vector_spec.rb +26 -28
- data/spec/maths/arithmetic/dataframe_spec.rb +1 -1
- data/spec/maths/arithmetic/vector_spec.rb +18 -0
- data/spec/maths/statistics/vector_spec.rb +153 -15
- data/spec/plotting/gruff/category_spec.rb +3 -3
- data/spec/plotting/gruff/dataframe_spec.rb +14 -4
- data/spec/plotting/gruff/vector_spec.rb +9 -9
- data/spec/plotting/nyaplot/category_spec.rb +5 -9
- data/spec/plotting/nyaplot/dataframe_spec.rb +95 -47
- data/spec/plotting/nyaplot/vector_spec.rb +5 -11
- data/spec/shared/vector_display_spec.rb +12 -14
- data/spec/spec_helper.rb +30 -7
- data/spec/support/matchers.rb +5 -0
- data/spec/vector_spec.rb +306 -72
- metadata +96 -55
- data/spec/fixtures/stock_data.csv +0 -500
data/spec/core/merge_spec.rb
CHANGED
@@ -112,7 +112,7 @@ describe Daru::DataFrame do
|
|
112
112
|
expect(@left.join(@right, how: :outer, on: [:name])).to eq(answer)
|
113
113
|
end
|
114
114
|
|
115
|
-
it "performs a left outer join"
|
115
|
+
it "performs a left outer join" do
|
116
116
|
answer = Daru::DataFrame.new({
|
117
117
|
:id_1 => [2,3,1,4],
|
118
118
|
:name => ["Monkey", "Ninja", "Pirate", "Spaghetti"],
|
data/spec/core/query_spec.rb
CHANGED
@@ -43,79 +43,79 @@ describe "Arel-like syntax" do
|
|
43
43
|
@comparator = Daru::Vector.new([45,22,1214,55,32,9])
|
44
44
|
@klass = Daru::Core::Query::BoolArray
|
45
45
|
end
|
46
|
-
|
46
|
+
|
47
47
|
context "#eq" do
|
48
48
|
it "accepts scalar value" do
|
49
49
|
expect(@vector.eq(352)).to eq(
|
50
50
|
@klass.new([false,false,false,true,false,false]))
|
51
51
|
end
|
52
|
-
|
52
|
+
|
53
53
|
it "accepts vector and compares corrensponding elements" do
|
54
54
|
expect(@vector.eq(@comparator)).to eq(
|
55
55
|
@klass.new([false,false,true,false,true,false]))
|
56
56
|
end
|
57
57
|
end
|
58
|
-
|
58
|
+
|
59
59
|
context "#not_eq" do
|
60
60
|
it "accepts scalar value" do
|
61
61
|
expect(@vector.not_eq(51)).to eq(
|
62
62
|
@klass.new([true, false, true, true, true, true]))
|
63
63
|
end
|
64
|
-
|
64
|
+
|
65
65
|
it "accepts vector and compares corrensponding elements" do
|
66
66
|
expect(@vector.not_eq(@comparator)).to eq(
|
67
67
|
@klass.new([true, true, false, true, false, true]))
|
68
68
|
end
|
69
69
|
end
|
70
|
-
|
70
|
+
|
71
71
|
context "#lt" do
|
72
72
|
it "accepts scalar value" do
|
73
73
|
expect(@vector.lt(51)).to eq(
|
74
74
|
@klass.new([true, false, false, false, true, true]))
|
75
75
|
end
|
76
|
-
|
76
|
+
|
77
77
|
it "accepts vector and compares corrensponding elements" do
|
78
78
|
expect(@vector.lt(@comparator)).to eq(
|
79
79
|
@klass.new([true,false,false,false,false,false]))
|
80
80
|
end
|
81
81
|
end
|
82
|
-
|
82
|
+
|
83
83
|
context "#lteq" do
|
84
84
|
it "accepts scalar value" do
|
85
85
|
expect(@vector.lteq(51)).to eq(
|
86
86
|
@klass.new([true, true, false, false, true, true]))
|
87
87
|
end
|
88
|
-
|
88
|
+
|
89
89
|
it "accepts vector and compares corrensponding elements" do
|
90
90
|
expect(@vector.lteq(@comparator)).to eq(
|
91
91
|
@klass.new([true,false,true,false,true,false]))
|
92
92
|
end
|
93
93
|
end
|
94
|
-
|
94
|
+
|
95
95
|
context "#mt" do
|
96
96
|
it "accepts scalar value" do
|
97
97
|
expect(@vector.mt(51)).to eq(
|
98
98
|
@klass.new([false, false, true, true, false, false]))
|
99
99
|
end
|
100
|
-
|
100
|
+
|
101
101
|
it "accepts vector and compares corrensponding elements" do
|
102
102
|
expect(@vector.mt(@comparator)).to eq(
|
103
103
|
@klass.new([false,true,false,true,false,true]))
|
104
104
|
end
|
105
105
|
end
|
106
|
-
|
106
|
+
|
107
107
|
context "#mteq" do
|
108
108
|
it "accepts scalar value" do
|
109
109
|
expect(@vector.mteq(51)).to eq(
|
110
110
|
@klass.new([false, true, true, true, false, false]))
|
111
111
|
end
|
112
|
-
|
112
|
+
|
113
113
|
it "accepts vector and compares corrensponding elements" do
|
114
114
|
expect(@vector.mteq(@comparator)).to eq(
|
115
115
|
@klass.new([false,true,true,true,true,true]))
|
116
116
|
end
|
117
117
|
end
|
118
|
-
|
118
|
+
|
119
119
|
context "#in" do
|
120
120
|
it "checks if any of elements in the arg are present in the vector" do
|
121
121
|
expect(@vector.in([23,55,1,33,32])).to eq(
|
@@ -123,7 +123,7 @@ describe "Arel-like syntax" do
|
|
123
123
|
end
|
124
124
|
end
|
125
125
|
end
|
126
|
-
|
126
|
+
|
127
127
|
describe "categorical type" do
|
128
128
|
let(:dv) { Daru::Vector.new ['e', 'd', 'd', 'x', 'x'],
|
129
129
|
categories: ['a', 'x', 'c', 'd', 'e'], type: :category }
|
@@ -134,99 +134,99 @@ describe "Arel-like syntax" do
|
|
134
134
|
context "#eq" do
|
135
135
|
context "scalar" do
|
136
136
|
subject { dv.eq 'd' }
|
137
|
-
|
137
|
+
|
138
138
|
it { is_expected.to be_a query_bool_class }
|
139
139
|
its(:to_a) { is_expected.to eq [false, true, true, false, false] }
|
140
140
|
end
|
141
141
|
|
142
142
|
context "vector" do
|
143
143
|
subject { dv.eq comp }
|
144
|
-
|
144
|
+
|
145
145
|
it { is_expected.to be_a query_bool_class }
|
146
146
|
its(:to_a) { is_expected.to eq [false, true, false, false, true] }
|
147
147
|
end
|
148
148
|
end
|
149
|
-
|
149
|
+
|
150
150
|
context "#not_eq" do
|
151
151
|
context "scalar" do
|
152
152
|
subject { dv.not_eq 'd' }
|
153
|
-
|
153
|
+
|
154
154
|
it { is_expected.to be_a query_bool_class }
|
155
155
|
its(:to_a) { is_expected.to eq [true, false, false, true, true] }
|
156
156
|
end
|
157
|
-
|
157
|
+
|
158
158
|
context "vector" do
|
159
159
|
subject { dv.not_eq comp }
|
160
|
-
|
160
|
+
|
161
161
|
it { is_expected.to be_a query_bool_class }
|
162
162
|
its(:to_a) { is_expected.to eq [true, false, true, true, false] }
|
163
163
|
end
|
164
164
|
end
|
165
|
-
|
165
|
+
|
166
166
|
context "#lt" do
|
167
167
|
context "scalar" do
|
168
168
|
subject { dv.lt 'd' }
|
169
|
-
|
169
|
+
|
170
170
|
it { is_expected.to be_a query_bool_class }
|
171
171
|
its(:to_a) { is_expected.to eq [false, false, false, true, true] }
|
172
172
|
end
|
173
|
-
|
173
|
+
|
174
174
|
context "vector" do
|
175
175
|
subject { dv.lt comp }
|
176
|
-
|
176
|
+
|
177
177
|
it { is_expected.to be_a query_bool_class }
|
178
178
|
its(:to_a) { is_expected.to eq [false, false, false, true, false] }
|
179
179
|
end
|
180
180
|
end
|
181
|
-
|
181
|
+
|
182
182
|
context "#lteq" do
|
183
183
|
context "scalar" do
|
184
184
|
subject { dv.lteq 'd' }
|
185
|
-
|
185
|
+
|
186
186
|
it { is_expected.to be_a query_bool_class }
|
187
187
|
its(:to_a) { is_expected.to eq [false, true, true, true, true] }
|
188
188
|
end
|
189
|
-
|
189
|
+
|
190
190
|
context "vector" do
|
191
191
|
subject { dv.lteq comp }
|
192
|
-
|
192
|
+
|
193
193
|
it { is_expected.to be_a query_bool_class }
|
194
194
|
its(:to_a) { is_expected.to eq [false, true, false, true, true] }
|
195
195
|
end
|
196
196
|
end
|
197
|
-
|
197
|
+
|
198
198
|
context "#mt" do
|
199
199
|
context "scalar" do
|
200
200
|
subject { dv.mt 'd' }
|
201
|
-
|
201
|
+
|
202
202
|
it { is_expected.to be_a query_bool_class }
|
203
203
|
its(:to_a) { is_expected.to eq [true, false, false, false, false] }
|
204
204
|
end
|
205
|
-
|
205
|
+
|
206
206
|
context "vector" do
|
207
207
|
subject { dv.mt comp }
|
208
|
-
|
208
|
+
|
209
209
|
it { is_expected.to be_a query_bool_class }
|
210
210
|
its(:to_a) { is_expected.to eq [true, false, true, false, false] }
|
211
211
|
end
|
212
212
|
end
|
213
|
-
|
213
|
+
|
214
214
|
context "#mteq" do
|
215
215
|
context "scalar" do
|
216
216
|
subject { dv.mteq 'd' }
|
217
|
-
|
217
|
+
|
218
218
|
it { is_expected.to be_a query_bool_class }
|
219
219
|
its(:to_a) { is_expected.to eq [true, true, true, false, false] }
|
220
220
|
end
|
221
|
-
|
221
|
+
|
222
222
|
context "vector" do
|
223
223
|
subject { dv.mteq comp }
|
224
|
-
|
224
|
+
|
225
225
|
it { is_expected.to be_a query_bool_class }
|
226
226
|
its(:to_a) { is_expected.to eq [true, true, true, false, true] }
|
227
227
|
end
|
228
228
|
end
|
229
|
-
|
229
|
+
|
230
230
|
# context "#in" do
|
231
231
|
# subject { dv.in ['b', 'd'] }
|
232
232
|
# it { is_expected.to be_a query_bool_class }
|
@@ -240,9 +240,9 @@ describe "Arel-like syntax" do
|
|
240
240
|
context Daru::DataFrame do
|
241
241
|
before do
|
242
242
|
@df = Daru::DataFrame.new({
|
243
|
-
number: [1,2,3,4,5,6],
|
244
|
-
sym: [:one, :two, :three, :four, :five, :six],
|
245
|
-
names: ['sameer', 'john', 'james', 'omisha', 'priyanka', 'shravan']
|
243
|
+
number: [1,2,3,4,5,6,Float::NAN],
|
244
|
+
sym: [:one, :two, :three, :four, :five, :six, :seven],
|
245
|
+
names: ['sameer', 'john', 'james', 'omisha', 'priyanka', 'shravan',nil]
|
246
246
|
})
|
247
247
|
end
|
248
248
|
|
@@ -267,6 +267,11 @@ describe "Arel-like syntax" do
|
|
267
267
|
).to eq(answer)
|
268
268
|
end
|
269
269
|
|
270
|
+
let(:dv) { Daru::Vector.new([1,11,32,Float::NAN,nil]) }
|
271
|
+
it "handles empty data" do
|
272
|
+
expect(dv.where(dv.lt(14))).to eq(Daru::Vector.new([1,11]))
|
273
|
+
end
|
274
|
+
|
270
275
|
it "does not give SystemStackError" do
|
271
276
|
v = Daru::Vector.new [1]*300_000
|
272
277
|
expect { v.where v.eq(1) }.not_to raise_error
|
@@ -276,45 +281,45 @@ describe "Arel-like syntax" do
|
|
276
281
|
context Daru::Vector do
|
277
282
|
context "non-categorical type" do
|
278
283
|
before do
|
279
|
-
@vector = Daru::Vector.new([2,5,1,22,51,4])
|
284
|
+
@vector = Daru::Vector.new([2,5,1,22,51,4,nil,Float::NAN])
|
280
285
|
end
|
281
|
-
|
286
|
+
|
282
287
|
it "accepts a simple single statement" do
|
283
288
|
expect(@vector.where(@vector.lt(10))).to eq(
|
284
289
|
Daru::Vector.new([2,5,1,4], index: Daru::Index.new([0,1,2,5])))
|
285
290
|
end
|
286
|
-
|
291
|
+
|
287
292
|
it "accepts somewhat complex operator chaining" do
|
288
293
|
expect(@vector.where((@vector.lt(6) | @vector.eq(51)))).to eq(
|
289
294
|
Daru::Vector.new([2,5,1,51,4], index: Daru::Index.new([0,1,2,4,5])))
|
290
295
|
end
|
291
296
|
end
|
292
|
-
|
297
|
+
|
293
298
|
context "categorical type" do
|
294
299
|
let(:dv) { Daru::Vector.new ['a', 'c', 'x', 'x', 'c'],
|
295
300
|
categories: ['a', 'x', 'c'], type: :category }
|
296
|
-
|
301
|
+
|
297
302
|
context "simple single statement" do
|
298
303
|
subject { dv.where(dv.lt('x')) }
|
299
|
-
|
304
|
+
|
300
305
|
it { is_expected.to be_a Daru::Vector }
|
301
306
|
its(:type) { is_expected.to eq :category }
|
302
307
|
its(:to_a) { is_expected.to eq ['a'] }
|
303
308
|
its(:'index.to_a') { is_expected.to eq [0] }
|
304
309
|
end
|
305
|
-
|
310
|
+
|
306
311
|
context "complex operator chaining" do
|
307
312
|
subject { dv.where((dv.lt('x') | dv.eq('c'))) }
|
308
|
-
|
313
|
+
|
309
314
|
it { is_expected.to be_a Daru::Vector }
|
310
315
|
its(:type) { is_expected.to eq :category }
|
311
316
|
its(:to_a) { is_expected.to eq ['a', 'c', 'c'] }
|
312
317
|
its(:'index.to_a') { is_expected.to eq [0, 1, 4] }
|
313
318
|
end
|
314
|
-
|
319
|
+
|
315
320
|
context "preserve categories" do
|
316
321
|
subject { dv.where((dv.lt('x') | dv.eq('c'))) }
|
317
|
-
|
322
|
+
|
318
323
|
it { is_expected.to be_a Daru::Vector }
|
319
324
|
its(:type) { is_expected.to eq :category }
|
320
325
|
its(:to_a) { is_expected.to eq ['a', 'c', 'c'] }
|
@@ -329,4 +334,14 @@ describe "Arel-like syntax" do
|
|
329
334
|
end
|
330
335
|
end
|
331
336
|
end
|
337
|
+
|
338
|
+
describe "apply_where" do
|
339
|
+
context "matches regexp with block input" do
|
340
|
+
subject { dv.apply_where(dv.match /weeks/) { |x| "#{x.split.first.to_i * 7} days" } }
|
341
|
+
|
342
|
+
let(:dv) { Daru::Vector.new ['3 days', '5 weeks', '2 weeks'] }
|
343
|
+
|
344
|
+
it { is_expected.to eq(Daru::Vector.new ['3 days', '35 days', '14 days']) }
|
345
|
+
end
|
346
|
+
end
|
332
347
|
end
|
data/spec/daru_spec.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe '#error' do
|
4
|
+
context 'by default' do
|
5
|
+
it { expect { Daru.error('test') }.to output("test\n").to_stderr_from_any_process }
|
6
|
+
end
|
7
|
+
|
8
|
+
context 'when set to nil' do
|
9
|
+
before { Daru.error_stream = nil }
|
10
|
+
it { expect { Daru.error('test') }.not_to output('test').to_stderr_from_any_process }
|
11
|
+
end
|
12
|
+
|
13
|
+
context 'when set to instance of custom class' do
|
14
|
+
let(:custom_stream) { double(puts: nil) }
|
15
|
+
before { Daru.error_stream = custom_stream }
|
16
|
+
|
17
|
+
it 'calls puts' do
|
18
|
+
expect { Daru.error('test') }.not_to output('test').to_stderr_from_any_process
|
19
|
+
expect(custom_stream).to have_received(:puts).with('test')
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
data/spec/dataframe_spec.rb
CHANGED
@@ -145,8 +145,15 @@ describe Daru::DataFrame do
|
|
145
145
|
end
|
146
146
|
|
147
147
|
context "#initialize" do
|
148
|
+
|
149
|
+
it "initializes an empty DataFrame with no arguments" do
|
150
|
+
df = Daru::DataFrame.new
|
151
|
+
expect(df.nrows).to eq(0)
|
152
|
+
expect(df.ncols).to eq(0)
|
153
|
+
end
|
154
|
+
|
148
155
|
context Daru::Index do
|
149
|
-
it "initializes an empty DataFrame" do
|
156
|
+
it "initializes an empty DataFrame with empty source arg" do
|
150
157
|
df = Daru::DataFrame.new({}, order: [:a, :b])
|
151
158
|
|
152
159
|
expect(df.vectors).to eq(Daru::Index.new [:a, :b])
|
@@ -164,6 +171,13 @@ describe Daru::DataFrame do
|
|
164
171
|
expect(df.a) .to eq([1,2,3,4,5].dv(:a, df.index))
|
165
172
|
end
|
166
173
|
|
174
|
+
it "initializes from a Hash and preserves default order" do
|
175
|
+
df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]},
|
176
|
+
index: [:one, :two, :three, :four, :five])
|
177
|
+
|
178
|
+
expect(df.vectors).to eq(Daru::Index.new [:b, :a])
|
179
|
+
end
|
180
|
+
|
167
181
|
it "initializes from a Hash of Vectors" do
|
168
182
|
va = Daru::Vector.new([1,2,3,4,5], index: [:one, :two, :three, :four, :five])
|
169
183
|
vb = Daru::Vector.new([11,12,13,14,15], index: [:one, :two, :three, :four, :five])
|
@@ -221,7 +235,7 @@ describe Daru::DataFrame do
|
|
221
235
|
df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5]})
|
222
236
|
|
223
237
|
expect(df.index) .to eq(Daru::Index.new [0,1,2,3,4])
|
224
|
-
expect(df.vectors).to eq(Daru::Index.new [:
|
238
|
+
expect(df.vectors).to eq(Daru::Index.new [:b, :a])
|
225
239
|
end
|
226
240
|
|
227
241
|
it "aligns indexes properly" do
|
@@ -329,6 +343,14 @@ describe Daru::DataFrame do
|
|
329
343
|
expect(df[:a]) .to eq(Daru::Vector.new([1,2,3,4,5]))
|
330
344
|
end
|
331
345
|
|
346
|
+
it "allows creation of dataframe with a default order" do
|
347
|
+
arr_of_arrs_df = Daru::DataFrame.new([[1,2,3], [4,5,6], [7,8,9]])
|
348
|
+
arr_of_vectors_df = Daru::DataFrame.new([Daru::Vector.new([1,2,3]), Daru::Vector.new([4,5,6]), Daru::Vector.new([7,8,9])])
|
349
|
+
|
350
|
+
expect(arr_of_arrs_df.vectors.to_a).to eq([0,1,2])
|
351
|
+
expect(arr_of_vectors_df.vectors.to_a).to eq([0,1,2])
|
352
|
+
end
|
353
|
+
|
332
354
|
it "raises error for incomplete DataFrame index" do
|
333
355
|
expect {
|
334
356
|
df = Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
@@ -516,12 +538,31 @@ describe Daru::DataFrame do
|
|
516
538
|
index: [:one, :two, :three, :four, :five]))
|
517
539
|
end
|
518
540
|
|
541
|
+
it "assigns new vector with default length if given just a value" do
|
542
|
+
@df[:d] = 1.0
|
543
|
+
expect(@df[:d]).to eq(Daru::Vector.new([1.0, 1.0, 1.0, 1.0, 1.0],
|
544
|
+
index: [:one, :two, :three, :four, :five], name: :d))
|
545
|
+
end
|
546
|
+
|
547
|
+
it "updates vector with default length if given just a value" do
|
548
|
+
@df[:c] = 1.0
|
549
|
+
expect(@df[:c]).to eq(Daru::Vector.new([1.0, 1.0, 1.0, 1.0, 1.0],
|
550
|
+
index: [:one, :two, :three, :four, :five], name: :c))
|
551
|
+
end
|
552
|
+
|
519
553
|
it "appends an Array as a Daru::Vector" do
|
520
554
|
@df[:d] = [69,99,108,85,49]
|
521
555
|
|
522
556
|
expect(@df.d.class).to eq(Daru::Vector)
|
523
557
|
end
|
524
558
|
|
559
|
+
it "appends an arbitrary enumerable as a Daru::Vector" do
|
560
|
+
@df[:d] = Set.new([69,99,108,85,49])
|
561
|
+
|
562
|
+
expect(@df[:d]).to eq(Daru::Vector.new([69, 99, 108, 85, 49],
|
563
|
+
index: [:one, :two, :three, :four, :five], name: :c))
|
564
|
+
end
|
565
|
+
|
525
566
|
it "replaces an already present vector" do
|
526
567
|
@df[:a] = [69,99,108,85,49].dv(nil, [:one, :two, :three, :four, :five])
|
527
568
|
|
@@ -697,6 +738,47 @@ describe Daru::DataFrame do
|
|
697
738
|
}
|
698
739
|
end
|
699
740
|
|
741
|
+
context "#insert_vector" do
|
742
|
+
subject(:data_frame) {
|
743
|
+
Daru::DataFrame.new({b: [11,12,13,14,15], a: [1,2,3,4,5],
|
744
|
+
c: [11,22,33,44,55]}, order: [:a, :b, :c],
|
745
|
+
index: [:one, :two, :three, :four, :five])
|
746
|
+
}
|
747
|
+
|
748
|
+
it "insert a new vector at the desired slot" do
|
749
|
+
df = Daru::DataFrame.new({
|
750
|
+
a: [1,2,3,4,5],
|
751
|
+
d: [710, 720, 730, 740, 750],
|
752
|
+
b: [11, 12, 13, 14, 15],
|
753
|
+
c: [11,22,33,44,55]}, order: [:a, :d, :b, :c],
|
754
|
+
index: [:one, :two, :three, :four, :five]
|
755
|
+
)
|
756
|
+
data_frame.insert_vector 1, :d, [710, 720, 730, 740, 750]
|
757
|
+
expect(subject).to eq df
|
758
|
+
end
|
759
|
+
|
760
|
+
it "raises error for data array being too big" do
|
761
|
+
expect {
|
762
|
+
source = (1..8).to_a
|
763
|
+
data_frame.insert_vector 1, :d, source
|
764
|
+
}.to raise_error(IndexError)
|
765
|
+
end
|
766
|
+
|
767
|
+
it "raises error for invalid index value" do
|
768
|
+
expect {
|
769
|
+
source = (1..5).to_a
|
770
|
+
data_frame.insert_vector 4, :d, source
|
771
|
+
}.to raise_error(ArgumentError)
|
772
|
+
end
|
773
|
+
|
774
|
+
it "raises error for invalid source type" do
|
775
|
+
expect {
|
776
|
+
source = 14
|
777
|
+
data_frame.insert_vector 3, :d, source
|
778
|
+
}.to raise_error(ArgumentError)
|
779
|
+
end
|
780
|
+
end
|
781
|
+
|
700
782
|
context "#row[]=" do
|
701
783
|
context Daru::Index do
|
702
784
|
before :each do
|
@@ -1546,6 +1628,22 @@ describe Daru::DataFrame do
|
|
1546
1628
|
}
|
1547
1629
|
end
|
1548
1630
|
|
1631
|
+
context 'with mulitiindex DF' do
|
1632
|
+
subject(:data_frame) {
|
1633
|
+
Daru::DataFrame.new({b: [11,12,13], a: [1,2,3],
|
1634
|
+
c: [11,22,33]}, order: [:a, :b, :c],
|
1635
|
+
index: Daru::MultiIndex.from_tuples([[:one, :two], [:one, :three], [:two, :four]]))
|
1636
|
+
}
|
1637
|
+
|
1638
|
+
before { data_frame.add_row [100,200,300], [:two, :five] }
|
1639
|
+
|
1640
|
+
it { is_expected.to eq(Daru::DataFrame.new({
|
1641
|
+
b: [11,12,13,200], a: [1,2,3,100],
|
1642
|
+
c: [11,22,33,300]}, order: [:a, :b, :c],
|
1643
|
+
index: Daru::MultiIndex.from_tuples([[:one, :two], [:one, :three], [:two, :four], [:two, :five]])))
|
1644
|
+
}
|
1645
|
+
end
|
1646
|
+
|
1549
1647
|
it "allows adding rows after making empty DF by specfying only order" do
|
1550
1648
|
df = Daru::DataFrame.new({}, order: [:a, :b, :c])
|
1551
1649
|
df.add_row [1,2,3]
|
@@ -1573,6 +1671,13 @@ describe Daru::DataFrame do
|
|
1573
1671
|
it 'has synonym' do
|
1574
1672
|
expect(@data_frame.first(2)).to eq(@data_frame.head(2))
|
1575
1673
|
end
|
1674
|
+
|
1675
|
+
it 'works on DateTime indexes' do
|
1676
|
+
idx = Daru::DateTimeIndex.new(['2017-01-01', '2017-02-01', '2017-03-01'])
|
1677
|
+
df = Daru::DataFrame.new({col1: ['a', 'b', 'c']}, index: idx)
|
1678
|
+
first = Daru::DataFrame.new({col1: ['a']}, index: Daru::DateTimeIndex.new(['2017-01-01']))
|
1679
|
+
expect(df.head(1)).to eq(first)
|
1680
|
+
end
|
1576
1681
|
end
|
1577
1682
|
|
1578
1683
|
context "#last" do
|
@@ -1756,6 +1861,59 @@ describe Daru::DataFrame do
|
|
1756
1861
|
end
|
1757
1862
|
end
|
1758
1863
|
|
1864
|
+
describe 'uniq' do
|
1865
|
+
let(:df) do
|
1866
|
+
Daru::DataFrame.from_csv 'spec/fixtures/duplicates.csv'
|
1867
|
+
end
|
1868
|
+
|
1869
|
+
context 'with no args' do
|
1870
|
+
it do
|
1871
|
+
result = df.uniq
|
1872
|
+
expect(result.shape.first).to eq 30
|
1873
|
+
end
|
1874
|
+
end
|
1875
|
+
|
1876
|
+
context 'given a vector' do
|
1877
|
+
it do
|
1878
|
+
result = df.uniq("color")
|
1879
|
+
expect(result.shape.first).to eq 2
|
1880
|
+
end
|
1881
|
+
end
|
1882
|
+
|
1883
|
+
context 'given an array of vectors' do
|
1884
|
+
it do
|
1885
|
+
result = df.uniq("color", "director_name")
|
1886
|
+
expect(result.shape.first).to eq 29
|
1887
|
+
end
|
1888
|
+
end
|
1889
|
+
end
|
1890
|
+
|
1891
|
+
context '#rolling_fillna!' do
|
1892
|
+
subject do
|
1893
|
+
Daru::DataFrame.new({
|
1894
|
+
a: [1, 2, 3, nil, Float::NAN, nil, 1, 7],
|
1895
|
+
b: [:a, :b, nil, Float::NAN, nil, 3, 5, nil],
|
1896
|
+
c: ['a', Float::NAN, 3, 4, 3, 5, nil, 7]
|
1897
|
+
})
|
1898
|
+
end
|
1899
|
+
|
1900
|
+
context 'rolling_fillna! forwards' do
|
1901
|
+
before { subject.rolling_fillna!(:forward) }
|
1902
|
+
it { expect(subject.rolling_fillna!(:forward)).to eq(subject) }
|
1903
|
+
its(:'a.to_a') { is_expected.to eq [1, 2, 3, 3, 3, 3, 1, 7] }
|
1904
|
+
its(:'b.to_a') { is_expected.to eq [:a, :b, :b, :b, :b, 3, 5, 5] }
|
1905
|
+
its(:'c.to_a') { is_expected.to eq ['a', 'a', 3, 4, 3, 5, 5, 7] }
|
1906
|
+
end
|
1907
|
+
|
1908
|
+
context 'rolling_fillna! backwards' do
|
1909
|
+
before { subject.rolling_fillna!(:backward) }
|
1910
|
+
it { expect(subject.rolling_fillna!(:backward)).to eq(subject) }
|
1911
|
+
its(:'a.to_a') { is_expected.to eq [1, 2, 3, 1, 1, 1, 1, 7] }
|
1912
|
+
its(:'b.to_a') { is_expected.to eq [:a, :b, 3, 3, 3, 3, 5, 0] }
|
1913
|
+
its(:'c.to_a') { is_expected.to eq ['a', 3, 3, 4, 3, 5, 7, 7] }
|
1914
|
+
end
|
1915
|
+
end
|
1916
|
+
|
1759
1917
|
context "#clone" do
|
1760
1918
|
it "returns a view of the whole dataframe" do
|
1761
1919
|
cloned = @data_frame.clone
|
@@ -2603,6 +2761,26 @@ describe Daru::DataFrame do
|
|
2603
2761
|
end
|
2604
2762
|
end
|
2605
2763
|
|
2764
|
+
context "#rename_vectors!" do
|
2765
|
+
before do
|
2766
|
+
@df = Daru::DataFrame.new({
|
2767
|
+
a: [1,2,3,4,5],
|
2768
|
+
b: [11,22,33,44,55],
|
2769
|
+
c: %w(a b c d e)
|
2770
|
+
})
|
2771
|
+
end
|
2772
|
+
|
2773
|
+
it "returns self as modified dataframe" do
|
2774
|
+
expect(@df.rename_vectors!(:a => :alpha)).to eq(@df)
|
2775
|
+
end
|
2776
|
+
|
2777
|
+
it "re-uses rename_vectors method" do
|
2778
|
+
name_map = { :a => :alpha, :c => :gamma }
|
2779
|
+
expect(@df).to receive(:rename_vectors).with(name_map)
|
2780
|
+
@df.rename_vectors! name_map
|
2781
|
+
end
|
2782
|
+
end
|
2783
|
+
|
2606
2784
|
context "#rename_vectors" do
|
2607
2785
|
before do
|
2608
2786
|
@df = Daru::DataFrame.new({
|
@@ -2612,6 +2790,10 @@ describe Daru::DataFrame do
|
|
2612
2790
|
})
|
2613
2791
|
end
|
2614
2792
|
|
2793
|
+
it "returns Daru::Index" do
|
2794
|
+
expect(@df.rename_vectors(:a => :alpha)).to be_kind_of(Daru::Index)
|
2795
|
+
end
|
2796
|
+
|
2615
2797
|
it "renames vectors using a hash map" do
|
2616
2798
|
@df.rename_vectors :a => :alpha, :c => :gamma
|
2617
2799
|
expect(@df.vectors.to_a).to eq([:alpha, :b, :gamma])
|
@@ -2667,6 +2849,11 @@ describe Daru::DataFrame do
|
|
2667
2849
|
:a => [1,2,3,4,5]
|
2668
2850
|
}, order: [:b, 'a', :a]))
|
2669
2851
|
end
|
2852
|
+
|
2853
|
+
it 'raises ArgumentError if argument was not an index' do
|
2854
|
+
df = Daru::DataFrame.new([])
|
2855
|
+
expect { df.reindex_vectors([]) }.to raise_error(ArgumentError)
|
2856
|
+
end
|
2670
2857
|
end
|
2671
2858
|
|
2672
2859
|
context "#to_matrix" do
|
@@ -3050,9 +3237,34 @@ describe Daru::DataFrame do
|
|
3050
3237
|
end
|
3051
3238
|
|
3052
3239
|
context "#summary" do
|
3053
|
-
|
3054
|
-
|
3055
|
-
|
3240
|
+
subject { df.summary }
|
3241
|
+
|
3242
|
+
context "DataFrame" do
|
3243
|
+
let(:df) { Daru::DataFrame.new({a: [1,2,5], b: [1,2,"string"]}, order: [:a, :b], index: [:one, :two, :three], name: 'frame') }
|
3244
|
+
it { is_expected.to eq %Q{
|
3245
|
+
|= frame
|
3246
|
+
| Number of rows: 3
|
3247
|
+
| Element:[a]
|
3248
|
+
| == a
|
3249
|
+
| n :3
|
3250
|
+
| non-missing:3
|
3251
|
+
| median: 2
|
3252
|
+
| mean: 2.6667
|
3253
|
+
| std.dev.: 2.0817
|
3254
|
+
| std.err.: 1.2019
|
3255
|
+
| skew: 0.2874
|
3256
|
+
| kurtosis: -2.3333
|
3257
|
+
| Element:[b]
|
3258
|
+
| == b
|
3259
|
+
| n :3
|
3260
|
+
| non-missing:3
|
3261
|
+
| factors: 1,2,string
|
3262
|
+
| mode: 1,2,string
|
3263
|
+
| Distribution
|
3264
|
+
| 1 1 100.00%
|
3265
|
+
| 2 1 100.00%
|
3266
|
+
| string 1 100.00%
|
3267
|
+
}.unindent }
|
3056
3268
|
end
|
3057
3269
|
end
|
3058
3270
|
|
@@ -3101,6 +3313,15 @@ describe Daru::DataFrame do
|
|
3101
3313
|
it { expect(subject['a'].to_a).to eq [1, 2, 3] }
|
3102
3314
|
it { expect(subject[:b].to_a).to eq [4, 5, 6] }
|
3103
3315
|
end
|
3316
|
+
|
3317
|
+
context "preserves indices for dataframes with same index" do
|
3318
|
+
let(:index) { ['one','two','three'] }
|
3319
|
+
let(:df1) { Daru::DataFrame.new({ 'a' => [1, 2, 3], 'b' => [3, 4, 5] }, index: index) }
|
3320
|
+
let(:df2) { Daru::DataFrame.new({ 'c' => [4, 5, 6], 'd' => [7, 8, 9] }, index: index) }
|
3321
|
+
subject { df1.merge df2 }
|
3322
|
+
|
3323
|
+
its(:index) { is_expected.to eq Daru::Index.new(index) }
|
3324
|
+
end
|
3104
3325
|
end
|
3105
3326
|
|
3106
3327
|
context "#vector_by_calculation" do
|
@@ -3115,25 +3336,41 @@ describe Daru::DataFrame do
|
|
3115
3336
|
end
|
3116
3337
|
end
|
3117
3338
|
|
3339
|
+
context "group_by" do
|
3340
|
+
context "on a single row DataFrame" do
|
3341
|
+
let(:df){ Daru::DataFrame.new(city: %w[Kyiv], year: [2015], value: [1]) }
|
3342
|
+
it "returns a groupby object" do
|
3343
|
+
expect(df.group_by([:city])).to be_a(Daru::Core::GroupBy)
|
3344
|
+
end
|
3345
|
+
it "has the correct index" do
|
3346
|
+
expect(df.group_by([:city]).groups).to eq({["Kyiv"]=>[0]})
|
3347
|
+
end
|
3348
|
+
end
|
3349
|
+
end
|
3350
|
+
|
3118
3351
|
context "#vector_sum" do
|
3119
3352
|
before do
|
3120
|
-
a1 = Daru::Vector.new [1, 2, 3, 4, 5, nil]
|
3121
|
-
a2 = Daru::Vector.new [10, 10, 20, 20, 20, 30]
|
3122
|
-
b1 = Daru::Vector.new [nil, 1, 1, 1, 1, 2]
|
3123
|
-
b2 = Daru::Vector.new [2, 2, 2, nil, 2, 3]
|
3353
|
+
a1 = Daru::Vector.new [1, 2, 3, 4, 5, nil, nil]
|
3354
|
+
a2 = Daru::Vector.new [10, 10, 20, 20, 20, 30, nil]
|
3355
|
+
b1 = Daru::Vector.new [nil, 1, 1, 1, 1, 2, nil]
|
3356
|
+
b2 = Daru::Vector.new [2, 2, 2, nil, 2, 3, nil]
|
3124
3357
|
@df = Daru::DataFrame.new({ :a1 => a1, :a2 => a2, :b1 => b1, :b2 => b2 })
|
3125
3358
|
end
|
3126
3359
|
|
3127
3360
|
it "calculates complete vector sum" do
|
3128
|
-
expect(@df.vector_sum).to eq(Daru::Vector.new [nil, 15, 26, nil, 28, nil])
|
3361
|
+
expect(@df.vector_sum).to eq(Daru::Vector.new [nil, 15, 26, nil, 28, nil, nil])
|
3362
|
+
end
|
3363
|
+
|
3364
|
+
it "ignores nils if skipnil is true" do
|
3365
|
+
expect(@df.vector_sum skipnil: true).to eq(Daru::Vector.new [13, 15, 26, 25, 28, 35, 0])
|
3129
3366
|
end
|
3130
3367
|
|
3131
3368
|
it "calculates partial vector sum" do
|
3132
3369
|
a = @df.vector_sum([:a1, :a2])
|
3133
3370
|
b = @df.vector_sum([:b1, :b2])
|
3134
3371
|
|
3135
|
-
expect(a).to eq(Daru::Vector.new [11, 12, 23, 24, 25, nil])
|
3136
|
-
expect(b).to eq(Daru::Vector.new [nil, 3, 3, nil, 3, 5])
|
3372
|
+
expect(a).to eq(Daru::Vector.new [11, 12, 23, 24, 25, nil, nil])
|
3373
|
+
expect(b).to eq(Daru::Vector.new [nil, 3, 3, nil, 3, 5, nil])
|
3137
3374
|
end
|
3138
3375
|
end
|
3139
3376
|
|
@@ -3321,7 +3558,8 @@ describe Daru::DataFrame do
|
|
3321
3558
|
ev_b = Daru::Vector.new [1, 1, 0]
|
3322
3559
|
ev_c = Daru::Vector.new [0, 1, 1]
|
3323
3560
|
df2 = Daru::DataFrame.new({
|
3324
|
-
:_id => ev_id, 'a' => ev_a, 'b' => ev_b, 'c' => ev_c }
|
3561
|
+
:_id => ev_id, 'a' => ev_a, 'b' => ev_b, 'c' => ev_c },
|
3562
|
+
order: ['a', 'b', 'c', :_id])
|
3325
3563
|
|
3326
3564
|
expect(df2).to eq(df)
|
3327
3565
|
end
|
@@ -3505,6 +3743,41 @@ describe Daru::DataFrame do
|
|
3505
3743
|
end
|
3506
3744
|
end
|
3507
3745
|
|
3746
|
+
context '#reset_index' do
|
3747
|
+
context 'when Index' do
|
3748
|
+
subject do
|
3749
|
+
Daru::DataFrame.new(
|
3750
|
+
{'vals' => [1,2,3,4,5]},
|
3751
|
+
index: Daru::Index.new(%w[a b c d e], name: 'indices')
|
3752
|
+
).reset_index
|
3753
|
+
end
|
3754
|
+
|
3755
|
+
it { is_expected.to eq Daru::DataFrame.new(
|
3756
|
+
'indices' => %w[a b c d e],
|
3757
|
+
'vals' => [1,2,3,4,5]
|
3758
|
+
)}
|
3759
|
+
end
|
3760
|
+
|
3761
|
+
context 'when MultiIndex' do
|
3762
|
+
subject do
|
3763
|
+
mi = Daru::MultiIndex.from_tuples([
|
3764
|
+
[0, 'a'], [0, 'b'], [1, 'a'], [1, 'b']
|
3765
|
+
])
|
3766
|
+
mi.name = %w[nums alphas]
|
3767
|
+
Daru::DataFrame.new(
|
3768
|
+
{'vals' => [1,2,3,4]},
|
3769
|
+
index: mi
|
3770
|
+
).reset_index
|
3771
|
+
end
|
3772
|
+
|
3773
|
+
it { is_expected.to eq Daru::DataFrame.new(
|
3774
|
+
'nums' => [0,0,1,1],
|
3775
|
+
'alphas' => %w[a b a b],
|
3776
|
+
'vals' => [1,2,3,4]
|
3777
|
+
)}
|
3778
|
+
end
|
3779
|
+
end
|
3780
|
+
|
3508
3781
|
context "#set_index" do
|
3509
3782
|
before(:each) do
|
3510
3783
|
@df = Daru::DataFrame.new({
|
@@ -3544,6 +3817,26 @@ describe Daru::DataFrame do
|
|
3544
3817
|
jholu.set_index(:a)
|
3545
3818
|
}.to raise_error(ArgumentError)
|
3546
3819
|
end
|
3820
|
+
|
3821
|
+
it "sets multiindex if array is given" do
|
3822
|
+
df = Daru::DataFrame.new({
|
3823
|
+
a: %w[a a b b],
|
3824
|
+
b: [1, 2, 1, 2],
|
3825
|
+
c: %w[a b c d]
|
3826
|
+
})
|
3827
|
+
df.set_index(%i[a b])
|
3828
|
+
expected =
|
3829
|
+
Daru::DataFrame.new(
|
3830
|
+
{ c: %w[a b c d] },
|
3831
|
+
index: Daru::MultiIndex.from_tuples(
|
3832
|
+
[['a', 1], ['a', 2], ['b', 1], ['b', 2]]
|
3833
|
+
)
|
3834
|
+
).tap do |df|
|
3835
|
+
df.index.name = %i[a b]
|
3836
|
+
df
|
3837
|
+
end
|
3838
|
+
expect(df).to eq(expected)
|
3839
|
+
end
|
3547
3840
|
end
|
3548
3841
|
|
3549
3842
|
context "#concat" do
|
@@ -3644,7 +3937,7 @@ describe Daru::DataFrame do
|
|
3644
3937
|
expect(df_union.index.to_a).to eq v1 + v2
|
3645
3938
|
end
|
3646
3939
|
end
|
3647
|
-
|
3940
|
+
|
3648
3941
|
context '#inspect' do
|
3649
3942
|
subject { df.inspect }
|
3650
3943
|
|
@@ -3667,6 +3960,41 @@ describe Daru::DataFrame do
|
|
3667
3960
|
}.unindent}
|
3668
3961
|
end
|
3669
3962
|
|
3963
|
+
context 'if index name is set' do
|
3964
|
+
context 'single index with name' do
|
3965
|
+
let(:df) { Daru::DataFrame.new({a: [1,2,3], b: [3,4,5], c: [6,7,8]},
|
3966
|
+
name: 'test')}
|
3967
|
+
before { df.index.name = 'index_name' }
|
3968
|
+
it { should == %Q{
|
3969
|
+
|#<Daru::DataFrame: test (3x3)>
|
3970
|
+
| index_name a b c
|
3971
|
+
| 0 1 3 6
|
3972
|
+
| 1 2 4 7
|
3973
|
+
| 2 3 5 8
|
3974
|
+
}.unindent}
|
3975
|
+
end
|
3976
|
+
|
3977
|
+
context 'MultiIndex with name' do
|
3978
|
+
let(:mi) { Daru::MultiIndex.new(
|
3979
|
+
levels: [[:a,:b,:c], [:one, :two]],
|
3980
|
+
labels: [[0,0,1,1,2,2], [0,1,0,1,0,1]], name: ['s1', 's2']) }
|
3981
|
+
let(:df) { Daru::DataFrame.new({
|
3982
|
+
a: [11, 12, 13, 14, 15, 16], b: [21, 22, 23, 24, 25, 26]},
|
3983
|
+
name: 'test', index: mi)}
|
3984
|
+
it { should == %Q{
|
3985
|
+
|#<Daru::DataFrame: test (6x2)>
|
3986
|
+
| s1 s2 a b
|
3987
|
+
| a one 11 21
|
3988
|
+
| two 12 22
|
3989
|
+
| b one 13 23
|
3990
|
+
| two 14 24
|
3991
|
+
| c one 15 25
|
3992
|
+
| two 16 26
|
3993
|
+
}.unindent}
|
3994
|
+
end
|
3995
|
+
|
3996
|
+
end
|
3997
|
+
|
3670
3998
|
context 'no name' do
|
3671
3999
|
let(:df) { Daru::DataFrame.new({a: [1,2,3], b: [3,4,5], c: [6,7,8]})}
|
3672
4000
|
it { should == %Q{
|
@@ -3709,6 +4037,21 @@ describe Daru::DataFrame do
|
|
3709
4037
|
| 12 1 1 1
|
3710
4038
|
| 13 1 1 1
|
3711
4039
|
| 14 1 1 1
|
4040
|
+
| 15 1 1 1
|
4041
|
+
| 16 1 1 1
|
4042
|
+
| 17 1 1 1
|
4043
|
+
| 18 1 1 1
|
4044
|
+
| 19 1 1 1
|
4045
|
+
| 20 1 1 1
|
4046
|
+
| 21 1 1 1
|
4047
|
+
| 22 1 1 1
|
4048
|
+
| 23 1 1 1
|
4049
|
+
| 24 1 1 1
|
4050
|
+
| 25 1 1 1
|
4051
|
+
| 26 1 1 1
|
4052
|
+
| 27 1 1 1
|
4053
|
+
| 28 1 1 1
|
4054
|
+
| 29 1 1 1
|
3712
4055
|
| ... ... ... ...
|
3713
4056
|
}.unindent}
|
3714
4057
|
end
|
@@ -3764,8 +4107,18 @@ describe Daru::DataFrame do
|
|
3764
4107
|
end
|
3765
4108
|
|
3766
4109
|
context '#to_s' do
|
3767
|
-
it 'produces
|
3768
|
-
expect(@data_frame.to_s).to eq
|
4110
|
+
it 'produces a class, size description' do
|
4111
|
+
expect(@data_frame.to_s).to eq "#<Daru::DataFrame(5x3)>"
|
4112
|
+
end
|
4113
|
+
|
4114
|
+
it 'produces a class, name, size description' do
|
4115
|
+
@data_frame.name = "Test"
|
4116
|
+
expect(@data_frame.to_s).to eq "#<Daru::DataFrame: Test(5x3)>"
|
4117
|
+
end
|
4118
|
+
|
4119
|
+
it 'produces a class, name, size description when the name is a symbol' do
|
4120
|
+
@data_frame.name = :Test
|
4121
|
+
expect(@data_frame.to_s).to eq "#<Daru::DataFrame: Test(5x3)>"
|
3769
4122
|
end
|
3770
4123
|
end
|
3771
4124
|
|
@@ -3824,6 +4177,102 @@ describe Daru::DataFrame do
|
|
3824
4177
|
end
|
3825
4178
|
end
|
3826
4179
|
|
4180
|
+
context '#access_row_tuples_by_indexs' do
|
4181
|
+
let(:df) {
|
4182
|
+
Daru::DataFrame.new({col: [:a, :b, :c, :d, :e], num: [52,12,07,17,01]}) }
|
4183
|
+
let(:df_idx) {
|
4184
|
+
Daru::DataFrame.new({a: [52, 12, 07], b: [1, 2, 3]}, index: [:one, :two, :three])
|
4185
|
+
}
|
4186
|
+
let (:mi_idx) do
|
4187
|
+
Daru::MultiIndex.from_tuples [
|
4188
|
+
[:a,:one,:bar],
|
4189
|
+
[:a,:one,:baz],
|
4190
|
+
[:b,:two,:bar],
|
4191
|
+
[:a,:two,:baz],
|
4192
|
+
]
|
4193
|
+
end
|
4194
|
+
let (:df_mi) do
|
4195
|
+
Daru::DataFrame.new({
|
4196
|
+
a: 1..4,
|
4197
|
+
b: 'a'..'d'
|
4198
|
+
}, index: mi_idx )
|
4199
|
+
end
|
4200
|
+
context 'when no index is given' do
|
4201
|
+
it 'returns empty Array' do
|
4202
|
+
expect(df.access_row_tuples_by_indexs()).to eq([])
|
4203
|
+
end
|
4204
|
+
end
|
4205
|
+
context 'when index(s) are given' do
|
4206
|
+
it 'returns Array of row tuples' do
|
4207
|
+
expect(df.access_row_tuples_by_indexs(1)).to eq([[:b, 12]])
|
4208
|
+
expect(df.access_row_tuples_by_indexs(0,3)).to eq([[:a, 52], [:d, 17]])
|
4209
|
+
end
|
4210
|
+
end
|
4211
|
+
context 'when custom index(s) are given' do
|
4212
|
+
it 'returns Array of row tuples' do
|
4213
|
+
expect(df_idx.access_row_tuples_by_indexs(:one,:three)).to eq(
|
4214
|
+
[[52, 1], [7, 3]]
|
4215
|
+
)
|
4216
|
+
end
|
4217
|
+
end
|
4218
|
+
context 'when multi index is given' do
|
4219
|
+
it 'returns Array of row tuples' do
|
4220
|
+
expect(df_mi.access_row_tuples_by_indexs(:a)).to eq(
|
4221
|
+
[[1, "a"], [2, "b"], [4, "d"]]
|
4222
|
+
)
|
4223
|
+
expect(df_mi.access_row_tuples_by_indexs(:a, :one, :baz)).to eq(
|
4224
|
+
[[2, "b"]]
|
4225
|
+
)
|
4226
|
+
end
|
4227
|
+
end
|
4228
|
+
end
|
4229
|
+
|
4230
|
+
context '#aggregate' do
|
4231
|
+
let(:cat_idx) { Daru::CategoricalIndex.new [:a, :b, :a, :a, :c] }
|
4232
|
+
let(:df) { Daru::DataFrame.new(num: [52,12,07,17,01], cat_index: cat_idx) }
|
4233
|
+
let(:df_cat_idx) {
|
4234
|
+
Daru::DataFrame.new({num: [52,12,07,17,01]}, index: cat_idx) }
|
4235
|
+
|
4236
|
+
it 'lambda function on particular column' do
|
4237
|
+
expect(df.aggregate(num_100_times: ->(df) { (df.num*100).first })).to eq(
|
4238
|
+
Daru::DataFrame.new(num_100_times: [5200, 1200, 700, 1700, 100])
|
4239
|
+
)
|
4240
|
+
end
|
4241
|
+
it 'aggregate sum on particular column' do
|
4242
|
+
expect(df_cat_idx.aggregate(num: :sum)).to eq(
|
4243
|
+
Daru::DataFrame.new({num: [76, 12, 1]}, index: [:a, :b, :c])
|
4244
|
+
)
|
4245
|
+
end
|
4246
|
+
end
|
4247
|
+
|
4248
|
+
context '#group_by_and_aggregate' do
|
4249
|
+
let(:spending_df) {
|
4250
|
+
Daru::DataFrame.rows([
|
4251
|
+
[2010, 'dev', 50, 1],
|
4252
|
+
[2010, 'dev', 150, 1],
|
4253
|
+
[2010, 'dev', 200, 1],
|
4254
|
+
[2011, 'dev', 50, 1],
|
4255
|
+
[2012, 'dev', 150, 1],
|
4256
|
+
|
4257
|
+
[2011, 'office', 300, 1],
|
4258
|
+
|
4259
|
+
[2010, 'market', 50, 1],
|
4260
|
+
[2011, 'market', 500, 1],
|
4261
|
+
[2012, 'market', 500, 1],
|
4262
|
+
[2012, 'market', 300, 1],
|
4263
|
+
|
4264
|
+
[2012, 'R&D', 10, 1],],
|
4265
|
+
order: [:year, :category, :spending, :nb_spending])
|
4266
|
+
}
|
4267
|
+
|
4268
|
+
it 'works as group_by + aggregate' do
|
4269
|
+
expect(spending_df.group_by_and_aggregate(:year, {spending: :sum})).to eq(
|
4270
|
+
spending_df.group_by(:year).aggregate(spending: :sum))
|
4271
|
+
expect(spending_df.group_by_and_aggregate([:year, :category], spending: :sum, nb_spending: :size)).to eq(
|
4272
|
+
spending_df.group_by([:year, :category]).aggregate(spending: :sum, nb_spending: :size))
|
4273
|
+
end
|
4274
|
+
end
|
4275
|
+
|
3827
4276
|
context '#create_sql' do
|
3828
4277
|
let(:df) { Daru::DataFrame.new({
|
3829
4278
|
a: [1,2,3],
|
@@ -3839,4 +4288,12 @@ describe Daru::DataFrame do
|
|
3839
4288
|
| c DATE) CHARACTER SET=UTF8;
|
3840
4289
|
}.unindent}
|
3841
4290
|
end
|
4291
|
+
|
4292
|
+
context "#by_single_key" do
|
4293
|
+
let(:df) { Daru::DataFrame.new(a: [1, 2, 3], b: [4, 5, 6] ) }
|
4294
|
+
|
4295
|
+
it 'raise error when vector is missing from dataframe' do
|
4296
|
+
expect { df[:c] }.to raise_error(IndexError, /Specified vector c does not exist/)
|
4297
|
+
end
|
4298
|
+
end
|
3842
4299
|
end if mri?
|