daru 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -74,7 +74,7 @@ module Daru
74
74
  def ==(other)
75
75
  return false if self.class != other.class || other.size != @size
76
76
 
77
- @relation_hash.keys == other.to_a &&
77
+ @keys == other.to_a &&
78
78
  @relation_hash.values == other.relation_hash.values
79
79
  end
80
80
 
@@ -201,7 +201,7 @@ module Daru
201
201
  end
202
202
 
203
203
  def to_a
204
- @relation_hash.keys
204
+ @keys
205
205
  end
206
206
 
207
207
  def key(value)
@@ -230,16 +230,16 @@ module Daru
230
230
  # # 3 false
231
231
  # # 4 true
232
232
  def is_values(*indexes) # rubocop:disable Style/PredicateName
233
- bool_array = @relation_hash.keys.map { |r| indexes.include?(r) }
233
+ bool_array = @keys.map { |r| indexes.include?(r) }
234
234
  Daru::Vector.new(bool_array)
235
235
  end
236
236
 
237
237
  def empty?
238
- @relation_hash.empty?
238
+ @size.zero?
239
239
  end
240
240
 
241
241
  def dup
242
- Daru::Index.new @relation_hash.keys
242
+ Daru::Index.new @keys, name: @name
243
243
  end
244
244
 
245
245
  def add *indexes
@@ -285,15 +285,17 @@ module Daru
285
285
  # di.sort #=> Daru::Index.new [1, 2, 99, 100, 101]
286
286
  def sort opts={}
287
287
  opts = {ascending: true}.merge(opts)
288
- if opts[:ascending]
289
- new_index, = @relation_hash.sort.transpose
290
- else
291
- new_index, = @relation_hash.sort.reverse.transpose
292
- end
288
+
289
+ new_index = @keys.sort
290
+ new_index = new_index.reverse unless opts[:ascending]
293
291
 
294
292
  self.class.new(new_index)
295
293
  end
296
294
 
295
+ def to_df
296
+ Daru::DataFrame.new(name => to_a)
297
+ end
298
+
297
299
  private
298
300
 
299
301
  def guess_index index
@@ -342,7 +344,6 @@ module Daru
342
344
 
343
345
  # Raises IndexError when one of the positions is an invalid position
344
346
  def validate_positions *positions
345
- positions = [positions] if positions.is_a? Integer
346
347
  positions.each do |pos|
347
348
  raise IndexError, "#{pos} is not a valid position." if pos >= size || pos < -size
348
349
  end
@@ -9,6 +9,7 @@ module Daru
9
9
  end
10
10
 
11
11
  attr_reader :labels
12
+ attr_reader :name
12
13
 
13
14
  def levels
14
15
  @levels.map(&:keys)
@@ -19,7 +20,7 @@ module Daru
19
20
  # If user don't want to put name for particular level then user must put
20
21
  # empty string in that index of Array `name`.
21
22
  # For example there is multi_index of 3 levels and user don't want to name
22
- # level 0, then do mulit_index.name = ['', 'level1_name1', 'level2_name']
23
+ # level 0, then do multi_index.name = ['', 'level1_name1', 'level2_name']
23
24
  #
24
25
  # @example
25
26
  #
@@ -196,12 +197,12 @@ module Daru
196
197
  end
197
198
 
198
199
  def add *indexes
199
- Daru::MultiIndex.from_tuples to_a << indexes
200
+ Daru::MultiIndex.from_tuples(to_a + [indexes])
200
201
  end
201
202
 
202
203
  def reorder(new_order)
203
204
  from = to_a
204
- self.class.from_tuples(new_order.map { |i| from[i] })
205
+ MultiIndex.from_tuples(new_order.map { |i| from[i] })
205
206
  end
206
207
 
207
208
  def try_retrieve_from_integer int
@@ -365,5 +366,9 @@ module Daru
365
366
  [nil] * (cur.size - left.size) + left.map(&:first)
366
367
  }
367
368
  end
369
+
370
+ def to_df
371
+ Daru::DataFrame.new(@name.zip(to_a.transpose).to_h)
372
+ end
368
373
  end
369
374
  end
@@ -34,7 +34,7 @@ module Daru
34
34
  end
35
35
  end
36
36
 
37
- module IO # rubocop:disable Metrics/ModuleLength
37
+ module IO
38
38
  class << self
39
39
  # Functions for loading/writing Excel files.
40
40
 
@@ -134,23 +134,11 @@ module Daru
134
134
  #
135
135
  # @return A dataframe containing the data in the given relation
136
136
  def from_activerecord(relation, *fields)
137
- if fields.empty?
138
- records = relation.map do |record|
139
- record.attributes.symbolize_keys
140
- end
141
- return Daru::DataFrame.new(records)
142
- else
143
- fields = fields.map(&:to_sym)
144
- end
137
+ fields = relation.klass.column_names if fields.empty?
138
+ fields = fields.map(&:to_sym)
145
139
 
146
- vectors = fields.map { |name| [name, Daru::Vector.new([], name: name)] }.to_h
147
-
148
- Daru::DataFrame.new(vectors, order: fields).tap do |df|
149
- relation.pluck(*fields).each do |record|
150
- df.add_row(Array(record))
151
- end
152
- df.update
153
- end
140
+ result = relation.pluck(*fields).transpose
141
+ Daru::DataFrame.new(result, order: fields).tap(&:update)
154
142
  end
155
143
 
156
144
  # Loading data from plain text files
@@ -1,5 +1,5 @@
1
1
  <b> Daru::DataFrame<%= name ? ": #{name} " : ''%>(<%=nrows%>x<%=ncols%>) </b>
2
- <table>
2
+ <table border="1" class="dataframe">
3
3
  <%= table_thead %>
4
4
  <%= table_tbody %>
5
5
  </table>
@@ -151,8 +151,6 @@ module Daru
151
151
  attr_accessor :labels
152
152
  # Store vector data in an array
153
153
  attr_reader :data
154
- # Ploting library being used for this vector
155
- attr_reader :plotting_library
156
154
  # TODO: Make private.
157
155
  attr_reader :nil_positions, :nan_positions
158
156
 
@@ -197,6 +195,13 @@ module Daru
197
195
  end
198
196
  end
199
197
 
198
+ # attr_reader for :plotting_library
199
+ def plotting_library
200
+ init_plotting_library
201
+
202
+ @plotting_library
203
+ end
204
+
200
205
  def plotting_library= lib
201
206
  case lib
202
207
  when :gruff, :nyaplot
@@ -207,11 +212,18 @@ module Daru
207
212
  )
208
213
  end
209
214
  else
210
- raise ArguementError, "Plotting library #{lib} not supported. "\
215
+ raise ArgumentError, "Plotting library #{lib} not supported. "\
211
216
  'Supported libraries are :nyaplot and :gruff'
212
217
  end
213
218
  end
214
219
 
220
+ # this method is overwritten: see Daru::Vector#plotting_library=
221
+ def plot(*args, **options, &b)
222
+ init_plotting_library
223
+
224
+ plot(*args, **options, &b)
225
+ end
226
+
215
227
  # Get one or more elements with specified index or a range.
216
228
  #
217
229
  # == Usage
@@ -1481,6 +1493,11 @@ module Daru
1481
1493
 
1482
1494
  private
1483
1495
 
1496
+ # Will lazily load the plotting library being used for this vector
1497
+ def init_plotting_library
1498
+ self.plotting_library = Daru.plotting_library
1499
+ end
1500
+
1484
1501
  def copy(values)
1485
1502
  # Make sure values is right-justified to the size of the vector
1486
1503
  values.concat([nil] * (size-values.size)) if values.size < size
@@ -1514,8 +1531,6 @@ module Daru
1514
1531
  guard_sizes!
1515
1532
 
1516
1533
  @possibly_changed_type = true
1517
- # Include plotting functionality
1518
- self.plotting_library = Daru.plotting_library
1519
1534
  end
1520
1535
 
1521
1536
  def parse_source source, opts
@@ -1598,7 +1613,6 @@ module Daru
1598
1613
 
1599
1614
  # Raises IndexError when one of the positions is an invalid position
1600
1615
  def validate_positions *positions
1601
- positions = [positions] if positions.is_a? Integer
1602
1616
  positions.each do |pos|
1603
1617
  raise IndexError, "#{pos} is not a valid position." if pos >= size
1604
1618
  end
@@ -1,3 +1,3 @@
1
1
  module Daru
2
- VERSION = '0.2.1'.freeze
2
+ VERSION = '0.2.2'.freeze
3
3
  end
@@ -620,9 +620,14 @@ describe Daru::Core::GroupBy do
620
620
  end
621
621
 
622
622
  it 'works as older methods' do
623
- newer_way = spending_df.group_by([:year, :category]).aggregate(spending: :sum, nb_spending: :sum)
624
623
  older_way = spending_df.group_by([:year, :category]).sum
624
+
625
+ newer_way = spending_df.group_by([:year, :category]).aggregate(spending: :sum, nb_spending: :sum)
625
626
  expect(newer_way).to eq(older_way)
627
+
628
+ contrived_way = spending_df.group_by([:year, :category]).aggregate(spending: :sum, nb_spending_lambda: ->(df) { df[:nb_spending].sum })
629
+ contrived_way.rename_vectors(nb_spending_lambda: :nb_spending)
630
+ expect(contrived_way).to eq(older_way)
626
631
  end
627
632
 
628
633
  context 'can aggregate on MultiIndex' do
@@ -2784,6 +2784,11 @@ describe Daru::DataFrame do
2784
2784
  :a => [1,2,3,4,5]
2785
2785
  }, order: [:b, 'a', :a]))
2786
2786
  end
2787
+
2788
+ it 'raises ArgumentError if argument was not an index' do
2789
+ df = Daru::DataFrame.new([])
2790
+ expect { df.reindex_vectors([]) }.to raise_error(ArgumentError)
2791
+ end
2787
2792
  end
2788
2793
 
2789
2794
  context "#to_matrix" do
@@ -3673,6 +3678,41 @@ describe Daru::DataFrame do
3673
3678
  end
3674
3679
  end
3675
3680
 
3681
+ context '#reset_index' do
3682
+ context 'when Index' do
3683
+ subject do
3684
+ Daru::DataFrame.new(
3685
+ {'vals' => [1,2,3,4,5]},
3686
+ index: Daru::Index.new(%w[a b c d e], name: 'indices')
3687
+ ).reset_index
3688
+ end
3689
+
3690
+ it { is_expected.to eq Daru::DataFrame.new(
3691
+ 'indices' => %w[a b c d e],
3692
+ 'vals' => [1,2,3,4,5]
3693
+ )}
3694
+ end
3695
+
3696
+ context 'when MultiIndex' do
3697
+ subject do
3698
+ mi = Daru::MultiIndex.from_tuples([
3699
+ [0, 'a'], [0, 'b'], [1, 'a'], [1, 'b']
3700
+ ])
3701
+ mi.name = %w[nums alphas]
3702
+ Daru::DataFrame.new(
3703
+ {'vals' => [1,2,3,4]},
3704
+ index: mi
3705
+ ).reset_index
3706
+ end
3707
+
3708
+ it { is_expected.to eq Daru::DataFrame.new(
3709
+ 'nums' => [0,0,1,1],
3710
+ 'alphas' => %w[a b a b],
3711
+ 'vals' => [1,2,3,4]
3712
+ )}
3713
+ end
3714
+ end
3715
+
3676
3716
  context "#set_index" do
3677
3717
  before(:each) do
3678
3718
  @df = Daru::DataFrame.new({
@@ -3712,6 +3752,26 @@ describe Daru::DataFrame do
3712
3752
  jholu.set_index(:a)
3713
3753
  }.to raise_error(ArgumentError)
3714
3754
  end
3755
+
3756
+ it "sets multiindex if array is given" do
3757
+ df = Daru::DataFrame.new({
3758
+ a: %w[a a b b],
3759
+ b: [1, 2, 1, 2],
3760
+ c: %w[a b c d]
3761
+ })
3762
+ df.set_index(%i[a b])
3763
+ expected =
3764
+ Daru::DataFrame.new(
3765
+ { c: %w[a b c d] },
3766
+ index: Daru::MultiIndex.from_tuples(
3767
+ [['a', 1], ['a', 2], ['b', 1], ['b', 2]]
3768
+ )
3769
+ ).tap do |df|
3770
+ df.index.name = %i[a b]
3771
+ df
3772
+ end
3773
+ expect(df).to eq(expected)
3774
+ end
3715
3775
  end
3716
3776
 
3717
3777
  context "#concat" do
@@ -4037,6 +4097,56 @@ describe Daru::DataFrame do
4037
4097
  end
4038
4098
  end
4039
4099
 
4100
+ context '#access_row_tuples_by_indexs' do
4101
+ let(:df) {
4102
+ Daru::DataFrame.new({col: [:a, :b, :c, :d, :e], num: [52,12,07,17,01]}) }
4103
+ let(:df_idx) {
4104
+ Daru::DataFrame.new({a: [52, 12, 07], b: [1, 2, 3]}, index: [:one, :two, :three])
4105
+ }
4106
+ let (:mi_idx) do
4107
+ Daru::MultiIndex.from_tuples [
4108
+ [:a,:one,:bar],
4109
+ [:a,:one,:baz],
4110
+ [:b,:two,:bar],
4111
+ [:a,:two,:baz],
4112
+ ]
4113
+ end
4114
+ let (:df_mi) do
4115
+ Daru::DataFrame.new({
4116
+ a: 1..4,
4117
+ b: 'a'..'d'
4118
+ }, index: mi_idx )
4119
+ end
4120
+ context 'when no index is given' do
4121
+ it 'returns empty Array' do
4122
+ expect(df.access_row_tuples_by_indexs()).to eq([])
4123
+ end
4124
+ end
4125
+ context 'when index(s) are given' do
4126
+ it 'returns Array of row tuples' do
4127
+ expect(df.access_row_tuples_by_indexs(1)).to eq([[:b, 12]])
4128
+ expect(df.access_row_tuples_by_indexs(0,3)).to eq([[:a, 52], [:d, 17]])
4129
+ end
4130
+ end
4131
+ context 'when custom index(s) are given' do
4132
+ it 'returns Array of row tuples' do
4133
+ expect(df_idx.access_row_tuples_by_indexs(:one,:three)).to eq(
4134
+ [[52, 1], [7, 3]]
4135
+ )
4136
+ end
4137
+ end
4138
+ context 'when multi index is given' do
4139
+ it 'returns Array of row tuples' do
4140
+ expect(df_mi.access_row_tuples_by_indexs(:a)).to eq(
4141
+ [[1, "a"], [2, "b"], [4, "d"]]
4142
+ )
4143
+ expect(df_mi.access_row_tuples_by_indexs(:a, :one, :baz)).to eq(
4144
+ [[2, "b"]]
4145
+ )
4146
+ end
4147
+ end
4148
+ end
4149
+
4040
4150
  context '#aggregate' do
4041
4151
  let(:cat_idx) { Daru::CategoricalIndex.new [:a, :b, :a, :a, :c] }
4042
4152
  let(:df) { Daru::DataFrame.new(num: [52,12,07,17,01], cat_index: cat_idx) }
@@ -388,4 +388,30 @@ describe Daru::Index do
388
388
  end
389
389
 
390
390
  end
391
+
392
+ context '#to_df' do
393
+ let(:idx) do
394
+ Daru::Index.new(['speaker', 'mic', 'guitar', 'amp'],
395
+ name: 'instruments')
396
+ end
397
+ subject { idx.to_df }
398
+
399
+ it { is_expected.to eq Daru::DataFrame.new(
400
+ 'instruments' => ['speaker', 'mic', 'guitar', 'amp']
401
+ )
402
+ }
403
+ end
404
+
405
+ context "#dup" do
406
+ let(:idx) do
407
+ Daru::Index.new(['speaker', 'mic', 'guitar', 'amp'],
408
+ name: 'instruments')
409
+ end
410
+ subject { idx.dup }
411
+
412
+ it { is_expected.to eq idx }
413
+ it 'have same names' do
414
+ expect(subject.name).to eq idx.name
415
+ end
416
+ end
391
417
  end
@@ -659,4 +659,22 @@ describe Daru::MultiIndex do
659
659
  it { expect(idx.valid? :a, :three).to eq false }
660
660
  end
661
661
  end
662
+
663
+ context '#to_df' do
664
+ let(:idx) do
665
+ described_class.from_tuples([
666
+ %w[a one bar],
667
+ %w[a two bar],
668
+ %w[b two baz],
669
+ %w[b one foo]
670
+ ]).tap { |idx| idx.name = %w[col1 col2 col3] }
671
+ end
672
+
673
+ subject { idx.to_df }
674
+ it { is_expected.to eq Daru::DataFrame.new(
675
+ 'col1' => %w[a a b b],
676
+ 'col2' => %w[one two two one],
677
+ 'col3' => %w[bar bar baz foo]
678
+ )}
679
+ end
662
680
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: daru
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sameer Deshmukh
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-07-02 00:00:00.000000000 Z
11
+ date: 2019-08-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: backports
@@ -56,14 +56,14 @@ dependencies:
56
56
  name: bundler
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - "~>"
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: '1.10'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - "~>"
66
+ - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '1.10'
69
69
  - !ruby/object:Gem::Dependency
@@ -280,16 +280,16 @@ dependencies:
280
280
  name: sqlite3
281
281
  requirement: !ruby/object:Gem::Requirement
282
282
  requirements:
283
- - - ">="
283
+ - - "~>"
284
284
  - !ruby/object:Gem::Version
285
- version: '0'
285
+ version: 1.3.13
286
286
  type: :development
287
287
  prerelease: false
288
288
  version_requirements: !ruby/object:Gem::Requirement
289
289
  requirements:
290
- - - ">="
290
+ - - "~>"
291
291
  - !ruby/object:Gem::Version
292
- version: '0'
292
+ version: 1.3.13
293
293
  - !ruby/object:Gem::Dependency
294
294
  name: rubocop
295
295
  requirement: !ruby/object:Gem::Requirement
@@ -418,6 +418,7 @@ files:
418
418
  - benchmarks/TradeoffData.csv
419
419
  - benchmarks/csv_reading.rb
420
420
  - benchmarks/dataframe_creation.rb
421
+ - benchmarks/db_loading.rb
421
422
  - benchmarks/duplicating.rb
422
423
  - benchmarks/group_by.rb
423
424
  - benchmarks/joining.rb
@@ -585,8 +586,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
585
586
  - !ruby/object:Gem::Version
586
587
  version: '0'
587
588
  requirements: []
588
- rubyforge_project:
589
- rubygems_version: 2.6.14
589
+ rubygems_version: 3.0.2
590
590
  signing_key:
591
591
  specification_version: 4
592
592
  summary: Data Analysis in RUby