daru 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -74,7 +74,7 @@ module Daru
74
74
  def ==(other)
75
75
  return false if self.class != other.class || other.size != @size
76
76
 
77
- @relation_hash.keys == other.to_a &&
77
+ @keys == other.to_a &&
78
78
  @relation_hash.values == other.relation_hash.values
79
79
  end
80
80
 
@@ -201,7 +201,7 @@ module Daru
201
201
  end
202
202
 
203
203
  def to_a
204
- @relation_hash.keys
204
+ @keys
205
205
  end
206
206
 
207
207
  def key(value)
@@ -230,16 +230,16 @@ module Daru
230
230
  # # 3 false
231
231
  # # 4 true
232
232
  def is_values(*indexes) # rubocop:disable Style/PredicateName
233
- bool_array = @relation_hash.keys.map { |r| indexes.include?(r) }
233
+ bool_array = @keys.map { |r| indexes.include?(r) }
234
234
  Daru::Vector.new(bool_array)
235
235
  end
236
236
 
237
237
  def empty?
238
- @relation_hash.empty?
238
+ @size.zero?
239
239
  end
240
240
 
241
241
  def dup
242
- Daru::Index.new @relation_hash.keys
242
+ Daru::Index.new @keys, name: @name
243
243
  end
244
244
 
245
245
  def add *indexes
@@ -285,15 +285,17 @@ module Daru
285
285
  # di.sort #=> Daru::Index.new [1, 2, 99, 100, 101]
286
286
  def sort opts={}
287
287
  opts = {ascending: true}.merge(opts)
288
- if opts[:ascending]
289
- new_index, = @relation_hash.sort.transpose
290
- else
291
- new_index, = @relation_hash.sort.reverse.transpose
292
- end
288
+
289
+ new_index = @keys.sort
290
+ new_index = new_index.reverse unless opts[:ascending]
293
291
 
294
292
  self.class.new(new_index)
295
293
  end
296
294
 
295
+ def to_df
296
+ Daru::DataFrame.new(name => to_a)
297
+ end
298
+
297
299
  private
298
300
 
299
301
  def guess_index index
@@ -342,7 +344,6 @@ module Daru
342
344
 
343
345
  # Raises IndexError when one of the positions is an invalid position
344
346
  def validate_positions *positions
345
- positions = [positions] if positions.is_a? Integer
346
347
  positions.each do |pos|
347
348
  raise IndexError, "#{pos} is not a valid position." if pos >= size || pos < -size
348
349
  end
@@ -9,6 +9,7 @@ module Daru
9
9
  end
10
10
 
11
11
  attr_reader :labels
12
+ attr_reader :name
12
13
 
13
14
  def levels
14
15
  @levels.map(&:keys)
@@ -19,7 +20,7 @@ module Daru
19
20
  # If user don't want to put name for particular level then user must put
20
21
  # empty string in that index of Array `name`.
21
22
  # For example there is multi_index of 3 levels and user don't want to name
22
- # level 0, then do mulit_index.name = ['', 'level1_name1', 'level2_name']
23
+ # level 0, then do multi_index.name = ['', 'level1_name1', 'level2_name']
23
24
  #
24
25
  # @example
25
26
  #
@@ -196,12 +197,12 @@ module Daru
196
197
  end
197
198
 
198
199
  def add *indexes
199
- Daru::MultiIndex.from_tuples to_a << indexes
200
+ Daru::MultiIndex.from_tuples(to_a + [indexes])
200
201
  end
201
202
 
202
203
  def reorder(new_order)
203
204
  from = to_a
204
- self.class.from_tuples(new_order.map { |i| from[i] })
205
+ MultiIndex.from_tuples(new_order.map { |i| from[i] })
205
206
  end
206
207
 
207
208
  def try_retrieve_from_integer int
@@ -365,5 +366,9 @@ module Daru
365
366
  [nil] * (cur.size - left.size) + left.map(&:first)
366
367
  }
367
368
  end
369
+
370
+ def to_df
371
+ Daru::DataFrame.new(@name.zip(to_a.transpose).to_h)
372
+ end
368
373
  end
369
374
  end
@@ -34,7 +34,7 @@ module Daru
34
34
  end
35
35
  end
36
36
 
37
- module IO # rubocop:disable Metrics/ModuleLength
37
+ module IO
38
38
  class << self
39
39
  # Functions for loading/writing Excel files.
40
40
 
@@ -134,23 +134,11 @@ module Daru
134
134
  #
135
135
  # @return A dataframe containing the data in the given relation
136
136
  def from_activerecord(relation, *fields)
137
- if fields.empty?
138
- records = relation.map do |record|
139
- record.attributes.symbolize_keys
140
- end
141
- return Daru::DataFrame.new(records)
142
- else
143
- fields = fields.map(&:to_sym)
144
- end
137
+ fields = relation.klass.column_names if fields.empty?
138
+ fields = fields.map(&:to_sym)
145
139
 
146
- vectors = fields.map { |name| [name, Daru::Vector.new([], name: name)] }.to_h
147
-
148
- Daru::DataFrame.new(vectors, order: fields).tap do |df|
149
- relation.pluck(*fields).each do |record|
150
- df.add_row(Array(record))
151
- end
152
- df.update
153
- end
140
+ result = relation.pluck(*fields).transpose
141
+ Daru::DataFrame.new(result, order: fields).tap(&:update)
154
142
  end
155
143
 
156
144
  # Loading data from plain text files
@@ -1,5 +1,5 @@
1
1
  <b> Daru::DataFrame<%= name ? ": #{name} " : ''%>(<%=nrows%>x<%=ncols%>) </b>
2
- <table>
2
+ <table border="1" class="dataframe">
3
3
  <%= table_thead %>
4
4
  <%= table_tbody %>
5
5
  </table>
@@ -151,8 +151,6 @@ module Daru
151
151
  attr_accessor :labels
152
152
  # Store vector data in an array
153
153
  attr_reader :data
154
- # Ploting library being used for this vector
155
- attr_reader :plotting_library
156
154
  # TODO: Make private.
157
155
  attr_reader :nil_positions, :nan_positions
158
156
 
@@ -197,6 +195,13 @@ module Daru
197
195
  end
198
196
  end
199
197
 
198
+ # attr_reader for :plotting_library
199
+ def plotting_library
200
+ init_plotting_library
201
+
202
+ @plotting_library
203
+ end
204
+
200
205
  def plotting_library= lib
201
206
  case lib
202
207
  when :gruff, :nyaplot
@@ -207,11 +212,18 @@ module Daru
207
212
  )
208
213
  end
209
214
  else
210
- raise ArguementError, "Plotting library #{lib} not supported. "\
215
+ raise ArgumentError, "Plotting library #{lib} not supported. "\
211
216
  'Supported libraries are :nyaplot and :gruff'
212
217
  end
213
218
  end
214
219
 
220
+ # this method is overwritten: see Daru::Vector#plotting_library=
221
+ def plot(*args, **options, &b)
222
+ init_plotting_library
223
+
224
+ plot(*args, **options, &b)
225
+ end
226
+
215
227
  # Get one or more elements with specified index or a range.
216
228
  #
217
229
  # == Usage
@@ -1481,6 +1493,11 @@ module Daru
1481
1493
 
1482
1494
  private
1483
1495
 
1496
+ # Will lazily load the plotting library being used for this vector
1497
+ def init_plotting_library
1498
+ self.plotting_library = Daru.plotting_library
1499
+ end
1500
+
1484
1501
  def copy(values)
1485
1502
  # Make sure values is right-justified to the size of the vector
1486
1503
  values.concat([nil] * (size-values.size)) if values.size < size
@@ -1514,8 +1531,6 @@ module Daru
1514
1531
  guard_sizes!
1515
1532
 
1516
1533
  @possibly_changed_type = true
1517
- # Include plotting functionality
1518
- self.plotting_library = Daru.plotting_library
1519
1534
  end
1520
1535
 
1521
1536
  def parse_source source, opts
@@ -1598,7 +1613,6 @@ module Daru
1598
1613
 
1599
1614
  # Raises IndexError when one of the positions is an invalid position
1600
1615
  def validate_positions *positions
1601
- positions = [positions] if positions.is_a? Integer
1602
1616
  positions.each do |pos|
1603
1617
  raise IndexError, "#{pos} is not a valid position." if pos >= size
1604
1618
  end
@@ -1,3 +1,3 @@
1
1
  module Daru
2
- VERSION = '0.2.1'.freeze
2
+ VERSION = '0.2.2'.freeze
3
3
  end
@@ -620,9 +620,14 @@ describe Daru::Core::GroupBy do
620
620
  end
621
621
 
622
622
  it 'works as older methods' do
623
- newer_way = spending_df.group_by([:year, :category]).aggregate(spending: :sum, nb_spending: :sum)
624
623
  older_way = spending_df.group_by([:year, :category]).sum
624
+
625
+ newer_way = spending_df.group_by([:year, :category]).aggregate(spending: :sum, nb_spending: :sum)
625
626
  expect(newer_way).to eq(older_way)
627
+
628
+ contrived_way = spending_df.group_by([:year, :category]).aggregate(spending: :sum, nb_spending_lambda: ->(df) { df[:nb_spending].sum })
629
+ contrived_way.rename_vectors(nb_spending_lambda: :nb_spending)
630
+ expect(contrived_way).to eq(older_way)
626
631
  end
627
632
 
628
633
  context 'can aggregate on MultiIndex' do
@@ -2784,6 +2784,11 @@ describe Daru::DataFrame do
2784
2784
  :a => [1,2,3,4,5]
2785
2785
  }, order: [:b, 'a', :a]))
2786
2786
  end
2787
+
2788
+ it 'raises ArgumentError if argument was not an index' do
2789
+ df = Daru::DataFrame.new([])
2790
+ expect { df.reindex_vectors([]) }.to raise_error(ArgumentError)
2791
+ end
2787
2792
  end
2788
2793
 
2789
2794
  context "#to_matrix" do
@@ -3673,6 +3678,41 @@ describe Daru::DataFrame do
3673
3678
  end
3674
3679
  end
3675
3680
 
3681
+ context '#reset_index' do
3682
+ context 'when Index' do
3683
+ subject do
3684
+ Daru::DataFrame.new(
3685
+ {'vals' => [1,2,3,4,5]},
3686
+ index: Daru::Index.new(%w[a b c d e], name: 'indices')
3687
+ ).reset_index
3688
+ end
3689
+
3690
+ it { is_expected.to eq Daru::DataFrame.new(
3691
+ 'indices' => %w[a b c d e],
3692
+ 'vals' => [1,2,3,4,5]
3693
+ )}
3694
+ end
3695
+
3696
+ context 'when MultiIndex' do
3697
+ subject do
3698
+ mi = Daru::MultiIndex.from_tuples([
3699
+ [0, 'a'], [0, 'b'], [1, 'a'], [1, 'b']
3700
+ ])
3701
+ mi.name = %w[nums alphas]
3702
+ Daru::DataFrame.new(
3703
+ {'vals' => [1,2,3,4]},
3704
+ index: mi
3705
+ ).reset_index
3706
+ end
3707
+
3708
+ it { is_expected.to eq Daru::DataFrame.new(
3709
+ 'nums' => [0,0,1,1],
3710
+ 'alphas' => %w[a b a b],
3711
+ 'vals' => [1,2,3,4]
3712
+ )}
3713
+ end
3714
+ end
3715
+
3676
3716
  context "#set_index" do
3677
3717
  before(:each) do
3678
3718
  @df = Daru::DataFrame.new({
@@ -3712,6 +3752,26 @@ describe Daru::DataFrame do
3712
3752
  jholu.set_index(:a)
3713
3753
  }.to raise_error(ArgumentError)
3714
3754
  end
3755
+
3756
+ it "sets multiindex if array is given" do
3757
+ df = Daru::DataFrame.new({
3758
+ a: %w[a a b b],
3759
+ b: [1, 2, 1, 2],
3760
+ c: %w[a b c d]
3761
+ })
3762
+ df.set_index(%i[a b])
3763
+ expected =
3764
+ Daru::DataFrame.new(
3765
+ { c: %w[a b c d] },
3766
+ index: Daru::MultiIndex.from_tuples(
3767
+ [['a', 1], ['a', 2], ['b', 1], ['b', 2]]
3768
+ )
3769
+ ).tap do |df|
3770
+ df.index.name = %i[a b]
3771
+ df
3772
+ end
3773
+ expect(df).to eq(expected)
3774
+ end
3715
3775
  end
3716
3776
 
3717
3777
  context "#concat" do
@@ -4037,6 +4097,56 @@ describe Daru::DataFrame do
4037
4097
  end
4038
4098
  end
4039
4099
 
4100
+ context '#access_row_tuples_by_indexs' do
4101
+ let(:df) {
4102
+ Daru::DataFrame.new({col: [:a, :b, :c, :d, :e], num: [52,12,07,17,01]}) }
4103
+ let(:df_idx) {
4104
+ Daru::DataFrame.new({a: [52, 12, 07], b: [1, 2, 3]}, index: [:one, :two, :three])
4105
+ }
4106
+ let (:mi_idx) do
4107
+ Daru::MultiIndex.from_tuples [
4108
+ [:a,:one,:bar],
4109
+ [:a,:one,:baz],
4110
+ [:b,:two,:bar],
4111
+ [:a,:two,:baz],
4112
+ ]
4113
+ end
4114
+ let (:df_mi) do
4115
+ Daru::DataFrame.new({
4116
+ a: 1..4,
4117
+ b: 'a'..'d'
4118
+ }, index: mi_idx )
4119
+ end
4120
+ context 'when no index is given' do
4121
+ it 'returns empty Array' do
4122
+ expect(df.access_row_tuples_by_indexs()).to eq([])
4123
+ end
4124
+ end
4125
+ context 'when index(s) are given' do
4126
+ it 'returns Array of row tuples' do
4127
+ expect(df.access_row_tuples_by_indexs(1)).to eq([[:b, 12]])
4128
+ expect(df.access_row_tuples_by_indexs(0,3)).to eq([[:a, 52], [:d, 17]])
4129
+ end
4130
+ end
4131
+ context 'when custom index(s) are given' do
4132
+ it 'returns Array of row tuples' do
4133
+ expect(df_idx.access_row_tuples_by_indexs(:one,:three)).to eq(
4134
+ [[52, 1], [7, 3]]
4135
+ )
4136
+ end
4137
+ end
4138
+ context 'when multi index is given' do
4139
+ it 'returns Array of row tuples' do
4140
+ expect(df_mi.access_row_tuples_by_indexs(:a)).to eq(
4141
+ [[1, "a"], [2, "b"], [4, "d"]]
4142
+ )
4143
+ expect(df_mi.access_row_tuples_by_indexs(:a, :one, :baz)).to eq(
4144
+ [[2, "b"]]
4145
+ )
4146
+ end
4147
+ end
4148
+ end
4149
+
4040
4150
  context '#aggregate' do
4041
4151
  let(:cat_idx) { Daru::CategoricalIndex.new [:a, :b, :a, :a, :c] }
4042
4152
  let(:df) { Daru::DataFrame.new(num: [52,12,07,17,01], cat_index: cat_idx) }
@@ -388,4 +388,30 @@ describe Daru::Index do
388
388
  end
389
389
 
390
390
  end
391
+
392
+ context '#to_df' do
393
+ let(:idx) do
394
+ Daru::Index.new(['speaker', 'mic', 'guitar', 'amp'],
395
+ name: 'instruments')
396
+ end
397
+ subject { idx.to_df }
398
+
399
+ it { is_expected.to eq Daru::DataFrame.new(
400
+ 'instruments' => ['speaker', 'mic', 'guitar', 'amp']
401
+ )
402
+ }
403
+ end
404
+
405
+ context "#dup" do
406
+ let(:idx) do
407
+ Daru::Index.new(['speaker', 'mic', 'guitar', 'amp'],
408
+ name: 'instruments')
409
+ end
410
+ subject { idx.dup }
411
+
412
+ it { is_expected.to eq idx }
413
+ it 'have same names' do
414
+ expect(subject.name).to eq idx.name
415
+ end
416
+ end
391
417
  end
@@ -659,4 +659,22 @@ describe Daru::MultiIndex do
659
659
  it { expect(idx.valid? :a, :three).to eq false }
660
660
  end
661
661
  end
662
+
663
+ context '#to_df' do
664
+ let(:idx) do
665
+ described_class.from_tuples([
666
+ %w[a one bar],
667
+ %w[a two bar],
668
+ %w[b two baz],
669
+ %w[b one foo]
670
+ ]).tap { |idx| idx.name = %w[col1 col2 col3] }
671
+ end
672
+
673
+ subject { idx.to_df }
674
+ it { is_expected.to eq Daru::DataFrame.new(
675
+ 'col1' => %w[a a b b],
676
+ 'col2' => %w[one two two one],
677
+ 'col3' => %w[bar bar baz foo]
678
+ )}
679
+ end
662
680
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: daru
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sameer Deshmukh
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-07-02 00:00:00.000000000 Z
11
+ date: 2019-08-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: backports
@@ -56,14 +56,14 @@ dependencies:
56
56
  name: bundler
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - "~>"
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: '1.10'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - "~>"
66
+ - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '1.10'
69
69
  - !ruby/object:Gem::Dependency
@@ -280,16 +280,16 @@ dependencies:
280
280
  name: sqlite3
281
281
  requirement: !ruby/object:Gem::Requirement
282
282
  requirements:
283
- - - ">="
283
+ - - "~>"
284
284
  - !ruby/object:Gem::Version
285
- version: '0'
285
+ version: 1.3.13
286
286
  type: :development
287
287
  prerelease: false
288
288
  version_requirements: !ruby/object:Gem::Requirement
289
289
  requirements:
290
- - - ">="
290
+ - - "~>"
291
291
  - !ruby/object:Gem::Version
292
- version: '0'
292
+ version: 1.3.13
293
293
  - !ruby/object:Gem::Dependency
294
294
  name: rubocop
295
295
  requirement: !ruby/object:Gem::Requirement
@@ -418,6 +418,7 @@ files:
418
418
  - benchmarks/TradeoffData.csv
419
419
  - benchmarks/csv_reading.rb
420
420
  - benchmarks/dataframe_creation.rb
421
+ - benchmarks/db_loading.rb
421
422
  - benchmarks/duplicating.rb
422
423
  - benchmarks/group_by.rb
423
424
  - benchmarks/joining.rb
@@ -585,8 +586,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
585
586
  - !ruby/object:Gem::Version
586
587
  version: '0'
587
588
  requirements: []
588
- rubyforge_project:
589
- rubygems_version: 2.6.14
589
+ rubygems_version: 3.0.2
590
590
  signing_key:
591
591
  specification_version: 4
592
592
  summary: Data Analysis in RUby