daru 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/.travis.yml +4 -1
- data/CONTRIBUTING.md +9 -0
- data/History.md +16 -1
- data/README.md +22 -3
- data/benchmarks/db_loading.rb +34 -0
- data/daru.gemspec +4 -2
- data/lib/daru/category.rb +13 -4
- data/lib/daru/core/group_by.rb +40 -31
- data/lib/daru/dataframe.rb +200 -54
- data/lib/daru/index/index.rb +12 -11
- data/lib/daru/index/multi_index.rb +8 -3
- data/lib/daru/io/io.rb +5 -17
- data/lib/daru/iruby/templates/dataframe.html.erb +1 -1
- data/lib/daru/vector.rb +20 -6
- data/lib/daru/version.rb +1 -1
- data/spec/core/group_by_spec.rb +6 -1
- data/spec/dataframe_spec.rb +110 -0
- data/spec/index/index_spec.rb +26 -0
- data/spec/index/multi_index_spec.rb +18 -0
- metadata +10 -10
data/lib/daru/index/index.rb
CHANGED
@@ -74,7 +74,7 @@ module Daru
|
|
74
74
|
def ==(other)
|
75
75
|
return false if self.class != other.class || other.size != @size
|
76
76
|
|
77
|
-
@
|
77
|
+
@keys == other.to_a &&
|
78
78
|
@relation_hash.values == other.relation_hash.values
|
79
79
|
end
|
80
80
|
|
@@ -201,7 +201,7 @@ module Daru
|
|
201
201
|
end
|
202
202
|
|
203
203
|
def to_a
|
204
|
-
@
|
204
|
+
@keys
|
205
205
|
end
|
206
206
|
|
207
207
|
def key(value)
|
@@ -230,16 +230,16 @@ module Daru
|
|
230
230
|
# # 3 false
|
231
231
|
# # 4 true
|
232
232
|
def is_values(*indexes) # rubocop:disable Style/PredicateName
|
233
|
-
bool_array = @
|
233
|
+
bool_array = @keys.map { |r| indexes.include?(r) }
|
234
234
|
Daru::Vector.new(bool_array)
|
235
235
|
end
|
236
236
|
|
237
237
|
def empty?
|
238
|
-
@
|
238
|
+
@size.zero?
|
239
239
|
end
|
240
240
|
|
241
241
|
def dup
|
242
|
-
Daru::Index.new @
|
242
|
+
Daru::Index.new @keys, name: @name
|
243
243
|
end
|
244
244
|
|
245
245
|
def add *indexes
|
@@ -285,15 +285,17 @@ module Daru
|
|
285
285
|
# di.sort #=> Daru::Index.new [1, 2, 99, 100, 101]
|
286
286
|
def sort opts={}
|
287
287
|
opts = {ascending: true}.merge(opts)
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
new_index, = @relation_hash.sort.reverse.transpose
|
292
|
-
end
|
288
|
+
|
289
|
+
new_index = @keys.sort
|
290
|
+
new_index = new_index.reverse unless opts[:ascending]
|
293
291
|
|
294
292
|
self.class.new(new_index)
|
295
293
|
end
|
296
294
|
|
295
|
+
def to_df
|
296
|
+
Daru::DataFrame.new(name => to_a)
|
297
|
+
end
|
298
|
+
|
297
299
|
private
|
298
300
|
|
299
301
|
def guess_index index
|
@@ -342,7 +344,6 @@ module Daru
|
|
342
344
|
|
343
345
|
# Raises IndexError when one of the positions is an invalid position
|
344
346
|
def validate_positions *positions
|
345
|
-
positions = [positions] if positions.is_a? Integer
|
346
347
|
positions.each do |pos|
|
347
348
|
raise IndexError, "#{pos} is not a valid position." if pos >= size || pos < -size
|
348
349
|
end
|
@@ -9,6 +9,7 @@ module Daru
|
|
9
9
|
end
|
10
10
|
|
11
11
|
attr_reader :labels
|
12
|
+
attr_reader :name
|
12
13
|
|
13
14
|
def levels
|
14
15
|
@levels.map(&:keys)
|
@@ -19,7 +20,7 @@ module Daru
|
|
19
20
|
# If user don't want to put name for particular level then user must put
|
20
21
|
# empty string in that index of Array `name`.
|
21
22
|
# For example there is multi_index of 3 levels and user don't want to name
|
22
|
-
# level 0, then do
|
23
|
+
# level 0, then do multi_index.name = ['', 'level1_name1', 'level2_name']
|
23
24
|
#
|
24
25
|
# @example
|
25
26
|
#
|
@@ -196,12 +197,12 @@ module Daru
|
|
196
197
|
end
|
197
198
|
|
198
199
|
def add *indexes
|
199
|
-
Daru::MultiIndex.from_tuples
|
200
|
+
Daru::MultiIndex.from_tuples(to_a + [indexes])
|
200
201
|
end
|
201
202
|
|
202
203
|
def reorder(new_order)
|
203
204
|
from = to_a
|
204
|
-
|
205
|
+
MultiIndex.from_tuples(new_order.map { |i| from[i] })
|
205
206
|
end
|
206
207
|
|
207
208
|
def try_retrieve_from_integer int
|
@@ -365,5 +366,9 @@ module Daru
|
|
365
366
|
[nil] * (cur.size - left.size) + left.map(&:first)
|
366
367
|
}
|
367
368
|
end
|
369
|
+
|
370
|
+
def to_df
|
371
|
+
Daru::DataFrame.new(@name.zip(to_a.transpose).to_h)
|
372
|
+
end
|
368
373
|
end
|
369
374
|
end
|
data/lib/daru/io/io.rb
CHANGED
@@ -34,7 +34,7 @@ module Daru
|
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
|
-
module IO
|
37
|
+
module IO
|
38
38
|
class << self
|
39
39
|
# Functions for loading/writing Excel files.
|
40
40
|
|
@@ -134,23 +134,11 @@ module Daru
|
|
134
134
|
#
|
135
135
|
# @return A dataframe containing the data in the given relation
|
136
136
|
def from_activerecord(relation, *fields)
|
137
|
-
if fields.empty?
|
138
|
-
|
139
|
-
record.attributes.symbolize_keys
|
140
|
-
end
|
141
|
-
return Daru::DataFrame.new(records)
|
142
|
-
else
|
143
|
-
fields = fields.map(&:to_sym)
|
144
|
-
end
|
137
|
+
fields = relation.klass.column_names if fields.empty?
|
138
|
+
fields = fields.map(&:to_sym)
|
145
139
|
|
146
|
-
|
147
|
-
|
148
|
-
Daru::DataFrame.new(vectors, order: fields).tap do |df|
|
149
|
-
relation.pluck(*fields).each do |record|
|
150
|
-
df.add_row(Array(record))
|
151
|
-
end
|
152
|
-
df.update
|
153
|
-
end
|
140
|
+
result = relation.pluck(*fields).transpose
|
141
|
+
Daru::DataFrame.new(result, order: fields).tap(&:update)
|
154
142
|
end
|
155
143
|
|
156
144
|
# Loading data from plain text files
|
data/lib/daru/vector.rb
CHANGED
@@ -151,8 +151,6 @@ module Daru
|
|
151
151
|
attr_accessor :labels
|
152
152
|
# Store vector data in an array
|
153
153
|
attr_reader :data
|
154
|
-
# Ploting library being used for this vector
|
155
|
-
attr_reader :plotting_library
|
156
154
|
# TODO: Make private.
|
157
155
|
attr_reader :nil_positions, :nan_positions
|
158
156
|
|
@@ -197,6 +195,13 @@ module Daru
|
|
197
195
|
end
|
198
196
|
end
|
199
197
|
|
198
|
+
# attr_reader for :plotting_library
|
199
|
+
def plotting_library
|
200
|
+
init_plotting_library
|
201
|
+
|
202
|
+
@plotting_library
|
203
|
+
end
|
204
|
+
|
200
205
|
def plotting_library= lib
|
201
206
|
case lib
|
202
207
|
when :gruff, :nyaplot
|
@@ -207,11 +212,18 @@ module Daru
|
|
207
212
|
)
|
208
213
|
end
|
209
214
|
else
|
210
|
-
raise
|
215
|
+
raise ArgumentError, "Plotting library #{lib} not supported. "\
|
211
216
|
'Supported libraries are :nyaplot and :gruff'
|
212
217
|
end
|
213
218
|
end
|
214
219
|
|
220
|
+
# this method is overwritten: see Daru::Vector#plotting_library=
|
221
|
+
def plot(*args, **options, &b)
|
222
|
+
init_plotting_library
|
223
|
+
|
224
|
+
plot(*args, **options, &b)
|
225
|
+
end
|
226
|
+
|
215
227
|
# Get one or more elements with specified index or a range.
|
216
228
|
#
|
217
229
|
# == Usage
|
@@ -1481,6 +1493,11 @@ module Daru
|
|
1481
1493
|
|
1482
1494
|
private
|
1483
1495
|
|
1496
|
+
# Will lazily load the plotting library being used for this vector
|
1497
|
+
def init_plotting_library
|
1498
|
+
self.plotting_library = Daru.plotting_library
|
1499
|
+
end
|
1500
|
+
|
1484
1501
|
def copy(values)
|
1485
1502
|
# Make sure values is right-justified to the size of the vector
|
1486
1503
|
values.concat([nil] * (size-values.size)) if values.size < size
|
@@ -1514,8 +1531,6 @@ module Daru
|
|
1514
1531
|
guard_sizes!
|
1515
1532
|
|
1516
1533
|
@possibly_changed_type = true
|
1517
|
-
# Include plotting functionality
|
1518
|
-
self.plotting_library = Daru.plotting_library
|
1519
1534
|
end
|
1520
1535
|
|
1521
1536
|
def parse_source source, opts
|
@@ -1598,7 +1613,6 @@ module Daru
|
|
1598
1613
|
|
1599
1614
|
# Raises IndexError when one of the positions is an invalid position
|
1600
1615
|
def validate_positions *positions
|
1601
|
-
positions = [positions] if positions.is_a? Integer
|
1602
1616
|
positions.each do |pos|
|
1603
1617
|
raise IndexError, "#{pos} is not a valid position." if pos >= size
|
1604
1618
|
end
|
data/lib/daru/version.rb
CHANGED
data/spec/core/group_by_spec.rb
CHANGED
@@ -620,9 +620,14 @@ describe Daru::Core::GroupBy do
|
|
620
620
|
end
|
621
621
|
|
622
622
|
it 'works as older methods' do
|
623
|
-
newer_way = spending_df.group_by([:year, :category]).aggregate(spending: :sum, nb_spending: :sum)
|
624
623
|
older_way = spending_df.group_by([:year, :category]).sum
|
624
|
+
|
625
|
+
newer_way = spending_df.group_by([:year, :category]).aggregate(spending: :sum, nb_spending: :sum)
|
625
626
|
expect(newer_way).to eq(older_way)
|
627
|
+
|
628
|
+
contrived_way = spending_df.group_by([:year, :category]).aggregate(spending: :sum, nb_spending_lambda: ->(df) { df[:nb_spending].sum })
|
629
|
+
contrived_way.rename_vectors(nb_spending_lambda: :nb_spending)
|
630
|
+
expect(contrived_way).to eq(older_way)
|
626
631
|
end
|
627
632
|
|
628
633
|
context 'can aggregate on MultiIndex' do
|
data/spec/dataframe_spec.rb
CHANGED
@@ -2784,6 +2784,11 @@ describe Daru::DataFrame do
|
|
2784
2784
|
:a => [1,2,3,4,5]
|
2785
2785
|
}, order: [:b, 'a', :a]))
|
2786
2786
|
end
|
2787
|
+
|
2788
|
+
it 'raises ArgumentError if argument was not an index' do
|
2789
|
+
df = Daru::DataFrame.new([])
|
2790
|
+
expect { df.reindex_vectors([]) }.to raise_error(ArgumentError)
|
2791
|
+
end
|
2787
2792
|
end
|
2788
2793
|
|
2789
2794
|
context "#to_matrix" do
|
@@ -3673,6 +3678,41 @@ describe Daru::DataFrame do
|
|
3673
3678
|
end
|
3674
3679
|
end
|
3675
3680
|
|
3681
|
+
context '#reset_index' do
|
3682
|
+
context 'when Index' do
|
3683
|
+
subject do
|
3684
|
+
Daru::DataFrame.new(
|
3685
|
+
{'vals' => [1,2,3,4,5]},
|
3686
|
+
index: Daru::Index.new(%w[a b c d e], name: 'indices')
|
3687
|
+
).reset_index
|
3688
|
+
end
|
3689
|
+
|
3690
|
+
it { is_expected.to eq Daru::DataFrame.new(
|
3691
|
+
'indices' => %w[a b c d e],
|
3692
|
+
'vals' => [1,2,3,4,5]
|
3693
|
+
)}
|
3694
|
+
end
|
3695
|
+
|
3696
|
+
context 'when MultiIndex' do
|
3697
|
+
subject do
|
3698
|
+
mi = Daru::MultiIndex.from_tuples([
|
3699
|
+
[0, 'a'], [0, 'b'], [1, 'a'], [1, 'b']
|
3700
|
+
])
|
3701
|
+
mi.name = %w[nums alphas]
|
3702
|
+
Daru::DataFrame.new(
|
3703
|
+
{'vals' => [1,2,3,4]},
|
3704
|
+
index: mi
|
3705
|
+
).reset_index
|
3706
|
+
end
|
3707
|
+
|
3708
|
+
it { is_expected.to eq Daru::DataFrame.new(
|
3709
|
+
'nums' => [0,0,1,1],
|
3710
|
+
'alphas' => %w[a b a b],
|
3711
|
+
'vals' => [1,2,3,4]
|
3712
|
+
)}
|
3713
|
+
end
|
3714
|
+
end
|
3715
|
+
|
3676
3716
|
context "#set_index" do
|
3677
3717
|
before(:each) do
|
3678
3718
|
@df = Daru::DataFrame.new({
|
@@ -3712,6 +3752,26 @@ describe Daru::DataFrame do
|
|
3712
3752
|
jholu.set_index(:a)
|
3713
3753
|
}.to raise_error(ArgumentError)
|
3714
3754
|
end
|
3755
|
+
|
3756
|
+
it "sets multiindex if array is given" do
|
3757
|
+
df = Daru::DataFrame.new({
|
3758
|
+
a: %w[a a b b],
|
3759
|
+
b: [1, 2, 1, 2],
|
3760
|
+
c: %w[a b c d]
|
3761
|
+
})
|
3762
|
+
df.set_index(%i[a b])
|
3763
|
+
expected =
|
3764
|
+
Daru::DataFrame.new(
|
3765
|
+
{ c: %w[a b c d] },
|
3766
|
+
index: Daru::MultiIndex.from_tuples(
|
3767
|
+
[['a', 1], ['a', 2], ['b', 1], ['b', 2]]
|
3768
|
+
)
|
3769
|
+
).tap do |df|
|
3770
|
+
df.index.name = %i[a b]
|
3771
|
+
df
|
3772
|
+
end
|
3773
|
+
expect(df).to eq(expected)
|
3774
|
+
end
|
3715
3775
|
end
|
3716
3776
|
|
3717
3777
|
context "#concat" do
|
@@ -4037,6 +4097,56 @@ describe Daru::DataFrame do
|
|
4037
4097
|
end
|
4038
4098
|
end
|
4039
4099
|
|
4100
|
+
context '#access_row_tuples_by_indexs' do
|
4101
|
+
let(:df) {
|
4102
|
+
Daru::DataFrame.new({col: [:a, :b, :c, :d, :e], num: [52,12,07,17,01]}) }
|
4103
|
+
let(:df_idx) {
|
4104
|
+
Daru::DataFrame.new({a: [52, 12, 07], b: [1, 2, 3]}, index: [:one, :two, :three])
|
4105
|
+
}
|
4106
|
+
let (:mi_idx) do
|
4107
|
+
Daru::MultiIndex.from_tuples [
|
4108
|
+
[:a,:one,:bar],
|
4109
|
+
[:a,:one,:baz],
|
4110
|
+
[:b,:two,:bar],
|
4111
|
+
[:a,:two,:baz],
|
4112
|
+
]
|
4113
|
+
end
|
4114
|
+
let (:df_mi) do
|
4115
|
+
Daru::DataFrame.new({
|
4116
|
+
a: 1..4,
|
4117
|
+
b: 'a'..'d'
|
4118
|
+
}, index: mi_idx )
|
4119
|
+
end
|
4120
|
+
context 'when no index is given' do
|
4121
|
+
it 'returns empty Array' do
|
4122
|
+
expect(df.access_row_tuples_by_indexs()).to eq([])
|
4123
|
+
end
|
4124
|
+
end
|
4125
|
+
context 'when index(s) are given' do
|
4126
|
+
it 'returns Array of row tuples' do
|
4127
|
+
expect(df.access_row_tuples_by_indexs(1)).to eq([[:b, 12]])
|
4128
|
+
expect(df.access_row_tuples_by_indexs(0,3)).to eq([[:a, 52], [:d, 17]])
|
4129
|
+
end
|
4130
|
+
end
|
4131
|
+
context 'when custom index(s) are given' do
|
4132
|
+
it 'returns Array of row tuples' do
|
4133
|
+
expect(df_idx.access_row_tuples_by_indexs(:one,:three)).to eq(
|
4134
|
+
[[52, 1], [7, 3]]
|
4135
|
+
)
|
4136
|
+
end
|
4137
|
+
end
|
4138
|
+
context 'when multi index is given' do
|
4139
|
+
it 'returns Array of row tuples' do
|
4140
|
+
expect(df_mi.access_row_tuples_by_indexs(:a)).to eq(
|
4141
|
+
[[1, "a"], [2, "b"], [4, "d"]]
|
4142
|
+
)
|
4143
|
+
expect(df_mi.access_row_tuples_by_indexs(:a, :one, :baz)).to eq(
|
4144
|
+
[[2, "b"]]
|
4145
|
+
)
|
4146
|
+
end
|
4147
|
+
end
|
4148
|
+
end
|
4149
|
+
|
4040
4150
|
context '#aggregate' do
|
4041
4151
|
let(:cat_idx) { Daru::CategoricalIndex.new [:a, :b, :a, :a, :c] }
|
4042
4152
|
let(:df) { Daru::DataFrame.new(num: [52,12,07,17,01], cat_index: cat_idx) }
|
data/spec/index/index_spec.rb
CHANGED
@@ -388,4 +388,30 @@ describe Daru::Index do
|
|
388
388
|
end
|
389
389
|
|
390
390
|
end
|
391
|
+
|
392
|
+
context '#to_df' do
|
393
|
+
let(:idx) do
|
394
|
+
Daru::Index.new(['speaker', 'mic', 'guitar', 'amp'],
|
395
|
+
name: 'instruments')
|
396
|
+
end
|
397
|
+
subject { idx.to_df }
|
398
|
+
|
399
|
+
it { is_expected.to eq Daru::DataFrame.new(
|
400
|
+
'instruments' => ['speaker', 'mic', 'guitar', 'amp']
|
401
|
+
)
|
402
|
+
}
|
403
|
+
end
|
404
|
+
|
405
|
+
context "#dup" do
|
406
|
+
let(:idx) do
|
407
|
+
Daru::Index.new(['speaker', 'mic', 'guitar', 'amp'],
|
408
|
+
name: 'instruments')
|
409
|
+
end
|
410
|
+
subject { idx.dup }
|
411
|
+
|
412
|
+
it { is_expected.to eq idx }
|
413
|
+
it 'have same names' do
|
414
|
+
expect(subject.name).to eq idx.name
|
415
|
+
end
|
416
|
+
end
|
391
417
|
end
|
@@ -659,4 +659,22 @@ describe Daru::MultiIndex do
|
|
659
659
|
it { expect(idx.valid? :a, :three).to eq false }
|
660
660
|
end
|
661
661
|
end
|
662
|
+
|
663
|
+
context '#to_df' do
|
664
|
+
let(:idx) do
|
665
|
+
described_class.from_tuples([
|
666
|
+
%w[a one bar],
|
667
|
+
%w[a two bar],
|
668
|
+
%w[b two baz],
|
669
|
+
%w[b one foo]
|
670
|
+
]).tap { |idx| idx.name = %w[col1 col2 col3] }
|
671
|
+
end
|
672
|
+
|
673
|
+
subject { idx.to_df }
|
674
|
+
it { is_expected.to eq Daru::DataFrame.new(
|
675
|
+
'col1' => %w[a a b b],
|
676
|
+
'col2' => %w[one two two one],
|
677
|
+
'col3' => %w[bar bar baz foo]
|
678
|
+
)}
|
679
|
+
end
|
662
680
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: daru
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sameer Deshmukh
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-08-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: backports
|
@@ -56,14 +56,14 @@ dependencies:
|
|
56
56
|
name: bundler
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- - "
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '1.10'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- - "
|
66
|
+
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '1.10'
|
69
69
|
- !ruby/object:Gem::Dependency
|
@@ -280,16 +280,16 @@ dependencies:
|
|
280
280
|
name: sqlite3
|
281
281
|
requirement: !ruby/object:Gem::Requirement
|
282
282
|
requirements:
|
283
|
-
- - "
|
283
|
+
- - "~>"
|
284
284
|
- !ruby/object:Gem::Version
|
285
|
-
version:
|
285
|
+
version: 1.3.13
|
286
286
|
type: :development
|
287
287
|
prerelease: false
|
288
288
|
version_requirements: !ruby/object:Gem::Requirement
|
289
289
|
requirements:
|
290
|
-
- - "
|
290
|
+
- - "~>"
|
291
291
|
- !ruby/object:Gem::Version
|
292
|
-
version:
|
292
|
+
version: 1.3.13
|
293
293
|
- !ruby/object:Gem::Dependency
|
294
294
|
name: rubocop
|
295
295
|
requirement: !ruby/object:Gem::Requirement
|
@@ -418,6 +418,7 @@ files:
|
|
418
418
|
- benchmarks/TradeoffData.csv
|
419
419
|
- benchmarks/csv_reading.rb
|
420
420
|
- benchmarks/dataframe_creation.rb
|
421
|
+
- benchmarks/db_loading.rb
|
421
422
|
- benchmarks/duplicating.rb
|
422
423
|
- benchmarks/group_by.rb
|
423
424
|
- benchmarks/joining.rb
|
@@ -585,8 +586,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
585
586
|
- !ruby/object:Gem::Version
|
586
587
|
version: '0'
|
587
588
|
requirements: []
|
588
|
-
|
589
|
-
rubygems_version: 2.6.14
|
589
|
+
rubygems_version: 3.0.2
|
590
590
|
signing_key:
|
591
591
|
specification_version: 4
|
592
592
|
summary: Data Analysis in RUby
|