daru 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/.travis.yml +4 -1
- data/CONTRIBUTING.md +9 -0
- data/History.md +16 -1
- data/README.md +22 -3
- data/benchmarks/db_loading.rb +34 -0
- data/daru.gemspec +4 -2
- data/lib/daru/category.rb +13 -4
- data/lib/daru/core/group_by.rb +40 -31
- data/lib/daru/dataframe.rb +200 -54
- data/lib/daru/index/index.rb +12 -11
- data/lib/daru/index/multi_index.rb +8 -3
- data/lib/daru/io/io.rb +5 -17
- data/lib/daru/iruby/templates/dataframe.html.erb +1 -1
- data/lib/daru/vector.rb +20 -6
- data/lib/daru/version.rb +1 -1
- data/spec/core/group_by_spec.rb +6 -1
- data/spec/dataframe_spec.rb +110 -0
- data/spec/index/index_spec.rb +26 -0
- data/spec/index/multi_index_spec.rb +18 -0
- metadata +10 -10
data/lib/daru/index/index.rb
CHANGED
@@ -74,7 +74,7 @@ module Daru
|
|
74
74
|
def ==(other)
|
75
75
|
return false if self.class != other.class || other.size != @size
|
76
76
|
|
77
|
-
@
|
77
|
+
@keys == other.to_a &&
|
78
78
|
@relation_hash.values == other.relation_hash.values
|
79
79
|
end
|
80
80
|
|
@@ -201,7 +201,7 @@ module Daru
|
|
201
201
|
end
|
202
202
|
|
203
203
|
def to_a
|
204
|
-
@
|
204
|
+
@keys
|
205
205
|
end
|
206
206
|
|
207
207
|
def key(value)
|
@@ -230,16 +230,16 @@ module Daru
|
|
230
230
|
# # 3 false
|
231
231
|
# # 4 true
|
232
232
|
def is_values(*indexes) # rubocop:disable Style/PredicateName
|
233
|
-
bool_array = @
|
233
|
+
bool_array = @keys.map { |r| indexes.include?(r) }
|
234
234
|
Daru::Vector.new(bool_array)
|
235
235
|
end
|
236
236
|
|
237
237
|
def empty?
|
238
|
-
@
|
238
|
+
@size.zero?
|
239
239
|
end
|
240
240
|
|
241
241
|
def dup
|
242
|
-
Daru::Index.new @
|
242
|
+
Daru::Index.new @keys, name: @name
|
243
243
|
end
|
244
244
|
|
245
245
|
def add *indexes
|
@@ -285,15 +285,17 @@ module Daru
|
|
285
285
|
# di.sort #=> Daru::Index.new [1, 2, 99, 100, 101]
|
286
286
|
def sort opts={}
|
287
287
|
opts = {ascending: true}.merge(opts)
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
new_index, = @relation_hash.sort.reverse.transpose
|
292
|
-
end
|
288
|
+
|
289
|
+
new_index = @keys.sort
|
290
|
+
new_index = new_index.reverse unless opts[:ascending]
|
293
291
|
|
294
292
|
self.class.new(new_index)
|
295
293
|
end
|
296
294
|
|
295
|
+
def to_df
|
296
|
+
Daru::DataFrame.new(name => to_a)
|
297
|
+
end
|
298
|
+
|
297
299
|
private
|
298
300
|
|
299
301
|
def guess_index index
|
@@ -342,7 +344,6 @@ module Daru
|
|
342
344
|
|
343
345
|
# Raises IndexError when one of the positions is an invalid position
|
344
346
|
def validate_positions *positions
|
345
|
-
positions = [positions] if positions.is_a? Integer
|
346
347
|
positions.each do |pos|
|
347
348
|
raise IndexError, "#{pos} is not a valid position." if pos >= size || pos < -size
|
348
349
|
end
|
@@ -9,6 +9,7 @@ module Daru
|
|
9
9
|
end
|
10
10
|
|
11
11
|
attr_reader :labels
|
12
|
+
attr_reader :name
|
12
13
|
|
13
14
|
def levels
|
14
15
|
@levels.map(&:keys)
|
@@ -19,7 +20,7 @@ module Daru
|
|
19
20
|
# If user don't want to put name for particular level then user must put
|
20
21
|
# empty string in that index of Array `name`.
|
21
22
|
# For example there is multi_index of 3 levels and user don't want to name
|
22
|
-
# level 0, then do
|
23
|
+
# level 0, then do multi_index.name = ['', 'level1_name1', 'level2_name']
|
23
24
|
#
|
24
25
|
# @example
|
25
26
|
#
|
@@ -196,12 +197,12 @@ module Daru
|
|
196
197
|
end
|
197
198
|
|
198
199
|
def add *indexes
|
199
|
-
Daru::MultiIndex.from_tuples
|
200
|
+
Daru::MultiIndex.from_tuples(to_a + [indexes])
|
200
201
|
end
|
201
202
|
|
202
203
|
def reorder(new_order)
|
203
204
|
from = to_a
|
204
|
-
|
205
|
+
MultiIndex.from_tuples(new_order.map { |i| from[i] })
|
205
206
|
end
|
206
207
|
|
207
208
|
def try_retrieve_from_integer int
|
@@ -365,5 +366,9 @@ module Daru
|
|
365
366
|
[nil] * (cur.size - left.size) + left.map(&:first)
|
366
367
|
}
|
367
368
|
end
|
369
|
+
|
370
|
+
def to_df
|
371
|
+
Daru::DataFrame.new(@name.zip(to_a.transpose).to_h)
|
372
|
+
end
|
368
373
|
end
|
369
374
|
end
|
data/lib/daru/io/io.rb
CHANGED
@@ -34,7 +34,7 @@ module Daru
|
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
|
-
module IO
|
37
|
+
module IO
|
38
38
|
class << self
|
39
39
|
# Functions for loading/writing Excel files.
|
40
40
|
|
@@ -134,23 +134,11 @@ module Daru
|
|
134
134
|
#
|
135
135
|
# @return A dataframe containing the data in the given relation
|
136
136
|
def from_activerecord(relation, *fields)
|
137
|
-
if fields.empty?
|
138
|
-
|
139
|
-
record.attributes.symbolize_keys
|
140
|
-
end
|
141
|
-
return Daru::DataFrame.new(records)
|
142
|
-
else
|
143
|
-
fields = fields.map(&:to_sym)
|
144
|
-
end
|
137
|
+
fields = relation.klass.column_names if fields.empty?
|
138
|
+
fields = fields.map(&:to_sym)
|
145
139
|
|
146
|
-
|
147
|
-
|
148
|
-
Daru::DataFrame.new(vectors, order: fields).tap do |df|
|
149
|
-
relation.pluck(*fields).each do |record|
|
150
|
-
df.add_row(Array(record))
|
151
|
-
end
|
152
|
-
df.update
|
153
|
-
end
|
140
|
+
result = relation.pluck(*fields).transpose
|
141
|
+
Daru::DataFrame.new(result, order: fields).tap(&:update)
|
154
142
|
end
|
155
143
|
|
156
144
|
# Loading data from plain text files
|
data/lib/daru/vector.rb
CHANGED
@@ -151,8 +151,6 @@ module Daru
|
|
151
151
|
attr_accessor :labels
|
152
152
|
# Store vector data in an array
|
153
153
|
attr_reader :data
|
154
|
-
# Ploting library being used for this vector
|
155
|
-
attr_reader :plotting_library
|
156
154
|
# TODO: Make private.
|
157
155
|
attr_reader :nil_positions, :nan_positions
|
158
156
|
|
@@ -197,6 +195,13 @@ module Daru
|
|
197
195
|
end
|
198
196
|
end
|
199
197
|
|
198
|
+
# attr_reader for :plotting_library
|
199
|
+
def plotting_library
|
200
|
+
init_plotting_library
|
201
|
+
|
202
|
+
@plotting_library
|
203
|
+
end
|
204
|
+
|
200
205
|
def plotting_library= lib
|
201
206
|
case lib
|
202
207
|
when :gruff, :nyaplot
|
@@ -207,11 +212,18 @@ module Daru
|
|
207
212
|
)
|
208
213
|
end
|
209
214
|
else
|
210
|
-
raise
|
215
|
+
raise ArgumentError, "Plotting library #{lib} not supported. "\
|
211
216
|
'Supported libraries are :nyaplot and :gruff'
|
212
217
|
end
|
213
218
|
end
|
214
219
|
|
220
|
+
# this method is overwritten: see Daru::Vector#plotting_library=
|
221
|
+
def plot(*args, **options, &b)
|
222
|
+
init_plotting_library
|
223
|
+
|
224
|
+
plot(*args, **options, &b)
|
225
|
+
end
|
226
|
+
|
215
227
|
# Get one or more elements with specified index or a range.
|
216
228
|
#
|
217
229
|
# == Usage
|
@@ -1481,6 +1493,11 @@ module Daru
|
|
1481
1493
|
|
1482
1494
|
private
|
1483
1495
|
|
1496
|
+
# Will lazily load the plotting library being used for this vector
|
1497
|
+
def init_plotting_library
|
1498
|
+
self.plotting_library = Daru.plotting_library
|
1499
|
+
end
|
1500
|
+
|
1484
1501
|
def copy(values)
|
1485
1502
|
# Make sure values is right-justified to the size of the vector
|
1486
1503
|
values.concat([nil] * (size-values.size)) if values.size < size
|
@@ -1514,8 +1531,6 @@ module Daru
|
|
1514
1531
|
guard_sizes!
|
1515
1532
|
|
1516
1533
|
@possibly_changed_type = true
|
1517
|
-
# Include plotting functionality
|
1518
|
-
self.plotting_library = Daru.plotting_library
|
1519
1534
|
end
|
1520
1535
|
|
1521
1536
|
def parse_source source, opts
|
@@ -1598,7 +1613,6 @@ module Daru
|
|
1598
1613
|
|
1599
1614
|
# Raises IndexError when one of the positions is an invalid position
|
1600
1615
|
def validate_positions *positions
|
1601
|
-
positions = [positions] if positions.is_a? Integer
|
1602
1616
|
positions.each do |pos|
|
1603
1617
|
raise IndexError, "#{pos} is not a valid position." if pos >= size
|
1604
1618
|
end
|
data/lib/daru/version.rb
CHANGED
data/spec/core/group_by_spec.rb
CHANGED
@@ -620,9 +620,14 @@ describe Daru::Core::GroupBy do
|
|
620
620
|
end
|
621
621
|
|
622
622
|
it 'works as older methods' do
|
623
|
-
newer_way = spending_df.group_by([:year, :category]).aggregate(spending: :sum, nb_spending: :sum)
|
624
623
|
older_way = spending_df.group_by([:year, :category]).sum
|
624
|
+
|
625
|
+
newer_way = spending_df.group_by([:year, :category]).aggregate(spending: :sum, nb_spending: :sum)
|
625
626
|
expect(newer_way).to eq(older_way)
|
627
|
+
|
628
|
+
contrived_way = spending_df.group_by([:year, :category]).aggregate(spending: :sum, nb_spending_lambda: ->(df) { df[:nb_spending].sum })
|
629
|
+
contrived_way.rename_vectors(nb_spending_lambda: :nb_spending)
|
630
|
+
expect(contrived_way).to eq(older_way)
|
626
631
|
end
|
627
632
|
|
628
633
|
context 'can aggregate on MultiIndex' do
|
data/spec/dataframe_spec.rb
CHANGED
@@ -2784,6 +2784,11 @@ describe Daru::DataFrame do
|
|
2784
2784
|
:a => [1,2,3,4,5]
|
2785
2785
|
}, order: [:b, 'a', :a]))
|
2786
2786
|
end
|
2787
|
+
|
2788
|
+
it 'raises ArgumentError if argument was not an index' do
|
2789
|
+
df = Daru::DataFrame.new([])
|
2790
|
+
expect { df.reindex_vectors([]) }.to raise_error(ArgumentError)
|
2791
|
+
end
|
2787
2792
|
end
|
2788
2793
|
|
2789
2794
|
context "#to_matrix" do
|
@@ -3673,6 +3678,41 @@ describe Daru::DataFrame do
|
|
3673
3678
|
end
|
3674
3679
|
end
|
3675
3680
|
|
3681
|
+
context '#reset_index' do
|
3682
|
+
context 'when Index' do
|
3683
|
+
subject do
|
3684
|
+
Daru::DataFrame.new(
|
3685
|
+
{'vals' => [1,2,3,4,5]},
|
3686
|
+
index: Daru::Index.new(%w[a b c d e], name: 'indices')
|
3687
|
+
).reset_index
|
3688
|
+
end
|
3689
|
+
|
3690
|
+
it { is_expected.to eq Daru::DataFrame.new(
|
3691
|
+
'indices' => %w[a b c d e],
|
3692
|
+
'vals' => [1,2,3,4,5]
|
3693
|
+
)}
|
3694
|
+
end
|
3695
|
+
|
3696
|
+
context 'when MultiIndex' do
|
3697
|
+
subject do
|
3698
|
+
mi = Daru::MultiIndex.from_tuples([
|
3699
|
+
[0, 'a'], [0, 'b'], [1, 'a'], [1, 'b']
|
3700
|
+
])
|
3701
|
+
mi.name = %w[nums alphas]
|
3702
|
+
Daru::DataFrame.new(
|
3703
|
+
{'vals' => [1,2,3,4]},
|
3704
|
+
index: mi
|
3705
|
+
).reset_index
|
3706
|
+
end
|
3707
|
+
|
3708
|
+
it { is_expected.to eq Daru::DataFrame.new(
|
3709
|
+
'nums' => [0,0,1,1],
|
3710
|
+
'alphas' => %w[a b a b],
|
3711
|
+
'vals' => [1,2,3,4]
|
3712
|
+
)}
|
3713
|
+
end
|
3714
|
+
end
|
3715
|
+
|
3676
3716
|
context "#set_index" do
|
3677
3717
|
before(:each) do
|
3678
3718
|
@df = Daru::DataFrame.new({
|
@@ -3712,6 +3752,26 @@ describe Daru::DataFrame do
|
|
3712
3752
|
jholu.set_index(:a)
|
3713
3753
|
}.to raise_error(ArgumentError)
|
3714
3754
|
end
|
3755
|
+
|
3756
|
+
it "sets multiindex if array is given" do
|
3757
|
+
df = Daru::DataFrame.new({
|
3758
|
+
a: %w[a a b b],
|
3759
|
+
b: [1, 2, 1, 2],
|
3760
|
+
c: %w[a b c d]
|
3761
|
+
})
|
3762
|
+
df.set_index(%i[a b])
|
3763
|
+
expected =
|
3764
|
+
Daru::DataFrame.new(
|
3765
|
+
{ c: %w[a b c d] },
|
3766
|
+
index: Daru::MultiIndex.from_tuples(
|
3767
|
+
[['a', 1], ['a', 2], ['b', 1], ['b', 2]]
|
3768
|
+
)
|
3769
|
+
).tap do |df|
|
3770
|
+
df.index.name = %i[a b]
|
3771
|
+
df
|
3772
|
+
end
|
3773
|
+
expect(df).to eq(expected)
|
3774
|
+
end
|
3715
3775
|
end
|
3716
3776
|
|
3717
3777
|
context "#concat" do
|
@@ -4037,6 +4097,56 @@ describe Daru::DataFrame do
|
|
4037
4097
|
end
|
4038
4098
|
end
|
4039
4099
|
|
4100
|
+
context '#access_row_tuples_by_indexs' do
|
4101
|
+
let(:df) {
|
4102
|
+
Daru::DataFrame.new({col: [:a, :b, :c, :d, :e], num: [52,12,07,17,01]}) }
|
4103
|
+
let(:df_idx) {
|
4104
|
+
Daru::DataFrame.new({a: [52, 12, 07], b: [1, 2, 3]}, index: [:one, :two, :three])
|
4105
|
+
}
|
4106
|
+
let (:mi_idx) do
|
4107
|
+
Daru::MultiIndex.from_tuples [
|
4108
|
+
[:a,:one,:bar],
|
4109
|
+
[:a,:one,:baz],
|
4110
|
+
[:b,:two,:bar],
|
4111
|
+
[:a,:two,:baz],
|
4112
|
+
]
|
4113
|
+
end
|
4114
|
+
let (:df_mi) do
|
4115
|
+
Daru::DataFrame.new({
|
4116
|
+
a: 1..4,
|
4117
|
+
b: 'a'..'d'
|
4118
|
+
}, index: mi_idx )
|
4119
|
+
end
|
4120
|
+
context 'when no index is given' do
|
4121
|
+
it 'returns empty Array' do
|
4122
|
+
expect(df.access_row_tuples_by_indexs()).to eq([])
|
4123
|
+
end
|
4124
|
+
end
|
4125
|
+
context 'when index(s) are given' do
|
4126
|
+
it 'returns Array of row tuples' do
|
4127
|
+
expect(df.access_row_tuples_by_indexs(1)).to eq([[:b, 12]])
|
4128
|
+
expect(df.access_row_tuples_by_indexs(0,3)).to eq([[:a, 52], [:d, 17]])
|
4129
|
+
end
|
4130
|
+
end
|
4131
|
+
context 'when custom index(s) are given' do
|
4132
|
+
it 'returns Array of row tuples' do
|
4133
|
+
expect(df_idx.access_row_tuples_by_indexs(:one,:three)).to eq(
|
4134
|
+
[[52, 1], [7, 3]]
|
4135
|
+
)
|
4136
|
+
end
|
4137
|
+
end
|
4138
|
+
context 'when multi index is given' do
|
4139
|
+
it 'returns Array of row tuples' do
|
4140
|
+
expect(df_mi.access_row_tuples_by_indexs(:a)).to eq(
|
4141
|
+
[[1, "a"], [2, "b"], [4, "d"]]
|
4142
|
+
)
|
4143
|
+
expect(df_mi.access_row_tuples_by_indexs(:a, :one, :baz)).to eq(
|
4144
|
+
[[2, "b"]]
|
4145
|
+
)
|
4146
|
+
end
|
4147
|
+
end
|
4148
|
+
end
|
4149
|
+
|
4040
4150
|
context '#aggregate' do
|
4041
4151
|
let(:cat_idx) { Daru::CategoricalIndex.new [:a, :b, :a, :a, :c] }
|
4042
4152
|
let(:df) { Daru::DataFrame.new(num: [52,12,07,17,01], cat_index: cat_idx) }
|
data/spec/index/index_spec.rb
CHANGED
@@ -388,4 +388,30 @@ describe Daru::Index do
|
|
388
388
|
end
|
389
389
|
|
390
390
|
end
|
391
|
+
|
392
|
+
context '#to_df' do
|
393
|
+
let(:idx) do
|
394
|
+
Daru::Index.new(['speaker', 'mic', 'guitar', 'amp'],
|
395
|
+
name: 'instruments')
|
396
|
+
end
|
397
|
+
subject { idx.to_df }
|
398
|
+
|
399
|
+
it { is_expected.to eq Daru::DataFrame.new(
|
400
|
+
'instruments' => ['speaker', 'mic', 'guitar', 'amp']
|
401
|
+
)
|
402
|
+
}
|
403
|
+
end
|
404
|
+
|
405
|
+
context "#dup" do
|
406
|
+
let(:idx) do
|
407
|
+
Daru::Index.new(['speaker', 'mic', 'guitar', 'amp'],
|
408
|
+
name: 'instruments')
|
409
|
+
end
|
410
|
+
subject { idx.dup }
|
411
|
+
|
412
|
+
it { is_expected.to eq idx }
|
413
|
+
it 'have same names' do
|
414
|
+
expect(subject.name).to eq idx.name
|
415
|
+
end
|
416
|
+
end
|
391
417
|
end
|
@@ -659,4 +659,22 @@ describe Daru::MultiIndex do
|
|
659
659
|
it { expect(idx.valid? :a, :three).to eq false }
|
660
660
|
end
|
661
661
|
end
|
662
|
+
|
663
|
+
context '#to_df' do
|
664
|
+
let(:idx) do
|
665
|
+
described_class.from_tuples([
|
666
|
+
%w[a one bar],
|
667
|
+
%w[a two bar],
|
668
|
+
%w[b two baz],
|
669
|
+
%w[b one foo]
|
670
|
+
]).tap { |idx| idx.name = %w[col1 col2 col3] }
|
671
|
+
end
|
672
|
+
|
673
|
+
subject { idx.to_df }
|
674
|
+
it { is_expected.to eq Daru::DataFrame.new(
|
675
|
+
'col1' => %w[a a b b],
|
676
|
+
'col2' => %w[one two two one],
|
677
|
+
'col3' => %w[bar bar baz foo]
|
678
|
+
)}
|
679
|
+
end
|
662
680
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: daru
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sameer Deshmukh
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-08-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: backports
|
@@ -56,14 +56,14 @@ dependencies:
|
|
56
56
|
name: bundler
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- - "
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '1.10'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- - "
|
66
|
+
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '1.10'
|
69
69
|
- !ruby/object:Gem::Dependency
|
@@ -280,16 +280,16 @@ dependencies:
|
|
280
280
|
name: sqlite3
|
281
281
|
requirement: !ruby/object:Gem::Requirement
|
282
282
|
requirements:
|
283
|
-
- - "
|
283
|
+
- - "~>"
|
284
284
|
- !ruby/object:Gem::Version
|
285
|
-
version:
|
285
|
+
version: 1.3.13
|
286
286
|
type: :development
|
287
287
|
prerelease: false
|
288
288
|
version_requirements: !ruby/object:Gem::Requirement
|
289
289
|
requirements:
|
290
|
-
- - "
|
290
|
+
- - "~>"
|
291
291
|
- !ruby/object:Gem::Version
|
292
|
-
version:
|
292
|
+
version: 1.3.13
|
293
293
|
- !ruby/object:Gem::Dependency
|
294
294
|
name: rubocop
|
295
295
|
requirement: !ruby/object:Gem::Requirement
|
@@ -418,6 +418,7 @@ files:
|
|
418
418
|
- benchmarks/TradeoffData.csv
|
419
419
|
- benchmarks/csv_reading.rb
|
420
420
|
- benchmarks/dataframe_creation.rb
|
421
|
+
- benchmarks/db_loading.rb
|
421
422
|
- benchmarks/duplicating.rb
|
422
423
|
- benchmarks/group_by.rb
|
423
424
|
- benchmarks/joining.rb
|
@@ -585,8 +586,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
585
586
|
- !ruby/object:Gem::Version
|
586
587
|
version: '0'
|
587
588
|
requirements: []
|
588
|
-
|
589
|
-
rubygems_version: 2.6.14
|
589
|
+
rubygems_version: 3.0.2
|
590
590
|
signing_key:
|
591
591
|
specification_version: 4
|
592
592
|
summary: Data Analysis in RUby
|