daru 0.1.4.1 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +3 -0
  3. data/.travis.yml +3 -0
  4. data/CONTRIBUTING.md +27 -3
  5. data/Guardfile +7 -0
  6. data/History.md +39 -1
  7. data/README.md +1 -1
  8. data/daru.gemspec +9 -2
  9. data/lib/daru.rb +4 -1
  10. data/lib/daru/accessors/gsl_wrapper.rb +93 -91
  11. data/lib/daru/accessors/nmatrix_wrapper.rb +109 -107
  12. data/lib/daru/category.rb +22 -15
  13. data/lib/daru/core/group_by.rb +13 -2
  14. data/lib/daru/core/merge.rb +37 -31
  15. data/lib/daru/core/query.rb +10 -2
  16. data/lib/daru/dataframe.rb +95 -34
  17. data/lib/daru/date_time/index.rb +15 -16
  18. data/lib/daru/date_time/offsets.rb +14 -11
  19. data/lib/daru/formatters/table.rb +2 -2
  20. data/lib/daru/index/categorical_index.rb +201 -0
  21. data/lib/daru/index/index.rb +289 -0
  22. data/lib/daru/index/multi_index.rb +266 -0
  23. data/lib/daru/maths/statistics/vector.rb +13 -9
  24. data/lib/daru/monkeys.rb +0 -7
  25. data/lib/daru/plotting/gruff/category.rb +1 -0
  26. data/lib/daru/plotting/gruff/dataframe.rb +3 -3
  27. data/lib/daru/plotting/nyaplot/dataframe.rb +1 -1
  28. data/lib/daru/vector.rb +36 -21
  29. data/lib/daru/version.rb +1 -1
  30. data/spec/accessors/array_wrapper_spec.rb +3 -0
  31. data/spec/accessors/{wrappers_spec.rb → gsl_wrapper_spec.rb} +0 -35
  32. data/spec/accessors/nmatrix_wrapper_spec.rb +32 -0
  33. data/spec/{categorical_spec.rb → category_spec.rb} +3 -0
  34. data/spec/core/group_by_spec.rb +17 -1
  35. data/spec/core/merge_spec.rb +38 -1
  36. data/spec/core/query_spec.rb +5 -0
  37. data/spec/dataframe_spec.rb +230 -57
  38. data/spec/date_time/offsets_spec.rb +84 -3
  39. data/spec/formatters/table_formatter_spec.rb +9 -0
  40. data/spec/index/categorical_index_spec.rb +2 -0
  41. data/spec/index/index_spec.rb +17 -2
  42. data/spec/{math → maths}/arithmetic/dataframe_spec.rb +0 -0
  43. data/spec/{math → maths}/arithmetic/vector_spec.rb +0 -0
  44. data/spec/{math → maths}/statistics/dataframe_spec.rb +1 -1
  45. data/spec/{math → maths}/statistics/vector_spec.rb +7 -12
  46. data/spec/plotting/gruff/category_spec.rb +44 -0
  47. data/spec/plotting/gruff/dataframe_spec.rb +84 -0
  48. data/spec/plotting/gruff/vector_spec.rb +70 -0
  49. data/spec/plotting/nyaplot/category_spec.rb +51 -0
  50. data/spec/plotting/{dataframe_spec.rb → nyaplot/dataframe_spec.rb} +0 -83
  51. data/spec/plotting/nyaplot/vector_spec.rb +66 -0
  52. data/spec/spec_helper.rb +3 -2
  53. data/spec/vector_spec.rb +68 -1
  54. metadata +53 -24
  55. data/lib/daru/index.rb +0 -761
  56. data/spec/plotting/vector_spec.rb +0 -230
@@ -301,86 +301,3 @@ describe Daru::DataFrame, 'category plotting' do
301
301
  it { expect { df.plot(type: :box, categorized: {by: :c, method: :color}) }.to raise_error ArgumentError }
302
302
  end
303
303
  end
304
-
305
- describe Daru::DataFrame, 'plotting dataframe using gruff' do
306
- before { Daru.plotting_library = :gruff }
307
- let(:df) do
308
- Daru::DataFrame.new({
309
- a: [1, 3, 5, 2, 5, 0],
310
- b: [1, 5, 2, 5, 1, 0],
311
- c: [1, 6, 7, 2, 6, 0]
312
- }, index: 'a'..'f')
313
- end
314
-
315
- context 'bar' do
316
- let(:plot) { instance_double 'Gruff::Bar' }
317
- before { allow(Gruff::Bar).to receive(:new).and_return(plot) }
318
- it 'plots bar graph' do
319
- expect(plot).to receive :labels=
320
- expect(plot).to receive(:data).exactly(3).times
321
- df.plot type: :bar
322
- end
323
-
324
- it 'plots bar graph with block' do
325
- expect(plot).to receive :labels=
326
- expect(plot).to receive(:data).exactly(3).times
327
- expect(plot).to receive :title=
328
- df.plot(type: :bar) { |p| p.title = 'hello' }
329
- end
330
-
331
- it 'plots with specified columns' do
332
- expect(plot).to receive :labels=
333
- expect(plot).to receive(:data).exactly(2).times
334
- df.plot type: :bar, y: [:a, :b]
335
- end
336
- end
337
-
338
- context 'line' do
339
- let(:plot) { instance_double 'Gruff::Line' }
340
- before { allow(Gruff::Line).to receive(:new).and_return(plot) }
341
- it 'plots line graph' do
342
- expect(plot).to receive :labels=
343
- expect(plot).to receive(:data).exactly(3).times
344
- df.plot type: :line
345
- end
346
- end
347
-
348
- context 'scatter' do
349
- let(:plot) { instance_double 'Gruff::Scatter' }
350
- before { allow(Gruff::Scatter).to receive(:new).and_return(plot) }
351
- it 'plots scatter graph' do
352
- expect(plot).to receive(:data).exactly(3).times
353
- df.plot type: :scatter
354
- end
355
-
356
- it 'plots with specified columns' do
357
- expect(plot).to receive(:data).exactly(1).times
358
- df.plot type: :scatter, x: :c, y: :a
359
- end
360
- end
361
-
362
- context 'invalid type' do
363
- it { expect { df.plot type: :lol }.to raise_error ArgumentError }
364
- end
365
- end
366
-
367
- describe Daru::DataFrame, 'dataframe category plotting with gruff' do
368
- before { Daru.plotting_library = :gruff }
369
- let(:df) do
370
- Daru::DataFrame.new({
371
- a: [1, 3, 5, 2, 5, 0],
372
- b: [1, 5, 2, 5, 1, 0],
373
- c: [:a, :b, :a, :a, :b, :a]
374
- }, index: 'a'..'f')
375
- end
376
- before { df.to_category :c }
377
-
378
- context 'scatter' do
379
- let(:plot) { instance_double 'Gruff::Scatter' }
380
- before { allow(Gruff::Scatter).to receive(:new).and_return(plot) }
381
- it 'plots scatter plot categorized by category vector' do
382
- expect(plot).to receive(:data).exactly(2).times
383
- df.plot type: :scatter, x: :a, y: :b, categorized: { by: :c }
384
- end
385
- end
386
- end
@@ -0,0 +1,66 @@
1
+ require 'spec_helper.rb'
2
+
3
+ describe Daru::Vector, 'plotting' do
4
+ let(:vector) { Daru::Vector.new([11, 22, 33], index: [:a, :b, :c]) }
5
+ let(:plot) { instance_double('Nyaplot::Plot') }
6
+ let(:diagram) { instance_double('Nyaplot::Diagram') }
7
+
8
+ before do
9
+ Daru.plotting_library = :nyaplot
10
+ allow(Nyaplot::Plot).to receive(:new).and_return(plot)
11
+ end
12
+
13
+ it 'plots the vector' do
14
+ expect(plot).to receive(:add).with(:box, [11, 22, 33]).ordered
15
+ expect(plot).to receive(:show).ordered
16
+
17
+ vector.plot(type: :box)
18
+ end
19
+
20
+ context 'scatter' do
21
+ it 'is default type' do
22
+ expect(plot).to receive(:add).with(:scatter, instance_of(Array), instance_of(Array)).ordered
23
+ expect(plot).to receive(:show).ordered
24
+
25
+ vector.plot
26
+ end
27
+
28
+ it 'sets x_axis to 0...size' do
29
+ expect(plot).to receive(:add).with(:scatter, [0, 1, 2], [11, 22, 33]).ordered
30
+ expect(plot).to receive(:show).ordered
31
+
32
+ vector.plot(type: :scatter)
33
+ end
34
+ end
35
+
36
+ [:box, :histogram].each do |type|
37
+ context type.to_s do
38
+ it 'does not set x axis' do
39
+ expect(plot).to receive(:add).with(type, [11, 22, 33]).ordered
40
+ expect(plot).to receive(:show).ordered
41
+
42
+ vector.plot(type: type)
43
+ end
44
+ end
45
+ end
46
+
47
+ [:bar, :line].each do |type| # FIXME: what other types 2D plot could have?..
48
+ context type.to_s do
49
+ it 'sets x axis to index' do
50
+ expect(plot).to receive(:add).with(type, [:a, :b, :c], [11, 22, 33]).ordered
51
+ expect(plot).to receive(:show).ordered
52
+
53
+ vector.plot(type: type)
54
+ end
55
+ end
56
+ end
57
+
58
+ context 'with block provided' do
59
+ it 'yields plot and diagram' do
60
+ expect(plot).to receive(:add).with(:box, [11, 22, 33]).ordered.and_return(diagram)
61
+ expect(plot).to receive(:show).ordered
62
+
63
+ expect { |b| vector.plot(type: :box, &b) }.to yield_with_args(plot, diagram)
64
+ end
65
+ end
66
+ end
@@ -26,8 +26,9 @@ RSpec::Expectations.configuration.warn_about_potential_false_positives = false
26
26
 
27
27
  require 'simplecov'
28
28
  SimpleCov.start do
29
- add_filter 'spec'
30
- minimum_coverage_by_file 95
29
+ add_filter 'vendor'
30
+ add_filter 'spec'
31
+ minimum_coverage_by_file 95
31
32
  end
32
33
 
33
34
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
@@ -1,3 +1,5 @@
1
+ require 'spec_helper.rb'
2
+
1
3
  describe Daru::Vector do
2
4
  ALL_DTYPES.each do |dtype|
3
5
  describe dtype.to_s do
@@ -84,6 +86,30 @@ describe Daru::Vector do
84
86
 
85
87
  end
86
88
 
89
+ context "#reorder!" do
90
+ let(:vector_with_dtype) do
91
+ Daru::Vector.new(
92
+ [1, 2, 3, 4],
93
+ index: [:a, :b, :c, :d],
94
+ dtype: dtype)
95
+ end
96
+ let(:arranged_vector) do
97
+ Daru::Vector.new([4,3,2,1], index: [:d, :c, :b, :a], dtype: dtype)
98
+ end
99
+
100
+ before do
101
+ vector_with_dtype.reorder! [3, 2, 1, 0]
102
+ end
103
+
104
+ it "rearranges with passed order" do
105
+ expect(vector_with_dtype).to eq arranged_vector
106
+ end
107
+
108
+ it "doesn't change dtype" do
109
+ expect(vector_with_dtype.data.class).to eq arranged_vector.data.class
110
+ end
111
+ end
112
+
87
113
  context ".new_with_size" do
88
114
  it "creates new vector from only size" do
89
115
  v1 = Daru::Vector.new 10.times.map { nil }, dtype: dtype
@@ -1177,10 +1203,25 @@ describe Daru::Vector do
1177
1203
  expect(@vector['2012-1-1']).to eq(1)
1178
1204
  end
1179
1205
 
1206
+ it "accepts an array as index" do
1207
+ @vector.index = [5,4,3,2,1]
1208
+
1209
+ expect(@vector.index.class).to eq(Daru::Index)
1210
+ expect(@vector[5]).to eq(1)
1211
+ end
1212
+
1213
+ it "accepts an range as index" do
1214
+ @vector.index = 'a'..'e'
1215
+
1216
+ expect(@vector.index.class).to eq(Daru::Index)
1217
+ expect(@vector['a']).to eq(1)
1218
+ end
1219
+
1180
1220
  it "raises error for index size != vector size" do
1181
1221
  expect {
1182
1222
  @vector.index = Daru::Index.new([4,2,6])
1183
- }.to raise_error
1223
+ }.to raise_error(ArgumentError, 'Size of supplied index 3 '\
1224
+ 'does not match size of Vector')
1184
1225
  end
1185
1226
  end
1186
1227
 
@@ -1882,6 +1923,32 @@ describe Daru::Vector do
1882
1923
  expect(lag2[lag2.size - 2]).to be_within(0.001).of(16.56)
1883
1924
  end
1884
1925
  end
1926
+
1927
+ context "#group_by" do
1928
+ let(:dv) { Daru::Vector.new [:a, :b, :a, :b, :c] }
1929
+
1930
+ context 'vector not specified' do
1931
+ subject { dv.group_by }
1932
+
1933
+ it { is_expected.to be_a Daru::Core::GroupBy }
1934
+ its(:'groups.size') { is_expected.to eq 3 }
1935
+ its(:groups) { is_expected.to eq({[:a]=>[0, 2], [:b]=>[1, 3], [:c]=>[4]}) }
1936
+ end
1937
+
1938
+ context 'vector name specified' do
1939
+ before { dv.name = :hello }
1940
+ subject { dv.group_by :hello }
1941
+
1942
+ it { is_expected.to be_a Daru::Core::GroupBy }
1943
+ its(:'groups.size') { is_expected.to eq 3 }
1944
+ its(:groups) { is_expected.to eq({[:a]=>[0, 2], [:b]=>[1, 3], [:c]=>[4]}) }
1945
+ end
1946
+
1947
+ context 'vector name invalid' do
1948
+ before { dv.name = :hello }
1949
+ it { expect { dv.group_by :abc }.to raise_error }
1950
+ end
1951
+ end
1885
1952
 
1886
1953
  context '#method_missing' do
1887
1954
  context 'getting' do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: daru
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4.1
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sameer Deshmukh
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-08-19 00:00:00.000000000 Z
11
+ date: 2017-01-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: backports
@@ -207,19 +207,19 @@ dependencies:
207
207
  - !ruby/object:Gem::Version
208
208
  version: '0.7'
209
209
  - !ruby/object:Gem::Dependency
210
- name: rb-gsl
210
+ name: gsl
211
211
  requirement: !ruby/object:Gem::Requirement
212
212
  requirements:
213
213
  - - "~>"
214
214
  - !ruby/object:Gem::Version
215
- version: '1.16'
215
+ version: 2.1.0.2
216
216
  type: :development
217
217
  prerelease: false
218
218
  version_requirements: !ruby/object:Gem::Requirement
219
219
  requirements:
220
220
  - - "~>"
221
221
  - !ruby/object:Gem::Version
222
- version: '1.16'
222
+ version: 2.1.0.2
223
223
  - !ruby/object:Gem::Dependency
224
224
  name: dbd-sqlite3
225
225
  requirement: !ruby/object:Gem::Requirement
@@ -318,6 +318,20 @@ dependencies:
318
318
  - - ">="
319
319
  - !ruby/object:Gem::Version
320
320
  version: '0'
321
+ - !ruby/object:Gem::Dependency
322
+ name: gruff
323
+ requirement: !ruby/object:Gem::Requirement
324
+ requirements:
325
+ - - ">="
326
+ - !ruby/object:Gem::Version
327
+ version: '0'
328
+ type: :development
329
+ prerelease: false
330
+ version_requirements: !ruby/object:Gem::Requirement
331
+ requirements:
332
+ - - ">="
333
+ - !ruby/object:Gem::Version
334
+ version: '0'
321
335
  - !ruby/object:Gem::Dependency
322
336
  name: nokogiri
323
337
  requirement: !ruby/object:Gem::Requirement
@@ -333,7 +347,7 @@ dependencies:
333
347
  - !ruby/object:Gem::Version
334
348
  version: '0'
335
349
  - !ruby/object:Gem::Dependency
336
- name: gruff
350
+ name: guard-rspec
337
351
  requirement: !ruby/object:Gem::Requirement
338
352
  requirements:
339
353
  - - ">="
@@ -366,6 +380,7 @@ files:
366
380
  - ".travis.yml"
367
381
  - CONTRIBUTING.md
368
382
  - Gemfile
383
+ - Guardfile
369
384
  - History.md
370
385
  - LICENSE
371
386
  - README.md
@@ -415,7 +430,9 @@ files:
415
430
  - lib/daru/extensions/rserve.rb
416
431
  - lib/daru/formatters/table.rb
417
432
  - lib/daru/helpers/array.rb
418
- - lib/daru/index.rb
433
+ - lib/daru/index/categorical_index.rb
434
+ - lib/daru/index/index.rb
435
+ - lib/daru/index/multi_index.rb
419
436
  - lib/daru/io/io.rb
420
437
  - lib/daru/io/sql_data_source.rb
421
438
  - lib/daru/iruby/helpers.rb
@@ -445,8 +462,10 @@ files:
445
462
  - profile/joining.rb
446
463
  - profile/sorting.rb
447
464
  - profile/vector_each_with_index.rb
448
- - spec/accessors/wrappers_spec.rb
449
- - spec/categorical_spec.rb
465
+ - spec/accessors/array_wrapper_spec.rb
466
+ - spec/accessors/gsl_wrapper_spec.rb
467
+ - spec/accessors/nmatrix_wrapper_spec.rb
468
+ - spec/category_spec.rb
450
469
  - spec/core/group_by_spec.rb
451
470
  - spec/core/merge_spec.rb
452
471
  - spec/core/query_spec.rb
@@ -477,13 +496,17 @@ files:
477
496
  - spec/iruby/helpers_spec.rb
478
497
  - spec/iruby/multi_index_spec.rb
479
498
  - spec/iruby/vector_spec.rb
480
- - spec/math/arithmetic/dataframe_spec.rb
481
- - spec/math/arithmetic/vector_spec.rb
482
- - spec/math/statistics/dataframe_spec.rb
483
- - spec/math/statistics/vector_spec.rb
499
+ - spec/maths/arithmetic/dataframe_spec.rb
500
+ - spec/maths/arithmetic/vector_spec.rb
501
+ - spec/maths/statistics/dataframe_spec.rb
502
+ - spec/maths/statistics/vector_spec.rb
484
503
  - spec/monkeys_spec.rb
485
- - spec/plotting/dataframe_spec.rb
486
- - spec/plotting/vector_spec.rb
504
+ - spec/plotting/gruff/category_spec.rb
505
+ - spec/plotting/gruff/dataframe_spec.rb
506
+ - spec/plotting/gruff/vector_spec.rb
507
+ - spec/plotting/nyaplot/category_spec.rb
508
+ - spec/plotting/nyaplot/dataframe_spec.rb
509
+ - spec/plotting/nyaplot/vector_spec.rb
487
510
  - spec/shared/vector_display_spec.rb
488
511
  - spec/spec_helper.rb
489
512
  - spec/support/database_helper.rb
@@ -527,13 +550,15 @@ required_rubygems_version: !ruby/object:Gem::Requirement
527
550
  version: '0'
528
551
  requirements: []
529
552
  rubyforge_project:
530
- rubygems_version: 2.5.1
553
+ rubygems_version: 2.6.8
531
554
  signing_key:
532
555
  specification_version: 4
533
556
  summary: Data Analysis in RUby
534
557
  test_files:
535
- - spec/accessors/wrappers_spec.rb
536
- - spec/categorical_spec.rb
558
+ - spec/accessors/array_wrapper_spec.rb
559
+ - spec/accessors/gsl_wrapper_spec.rb
560
+ - spec/accessors/nmatrix_wrapper_spec.rb
561
+ - spec/category_spec.rb
537
562
  - spec/core/group_by_spec.rb
538
563
  - spec/core/merge_spec.rb
539
564
  - spec/core/query_spec.rb
@@ -564,13 +589,17 @@ test_files:
564
589
  - spec/iruby/helpers_spec.rb
565
590
  - spec/iruby/multi_index_spec.rb
566
591
  - spec/iruby/vector_spec.rb
567
- - spec/math/arithmetic/dataframe_spec.rb
568
- - spec/math/arithmetic/vector_spec.rb
569
- - spec/math/statistics/dataframe_spec.rb
570
- - spec/math/statistics/vector_spec.rb
592
+ - spec/maths/arithmetic/dataframe_spec.rb
593
+ - spec/maths/arithmetic/vector_spec.rb
594
+ - spec/maths/statistics/dataframe_spec.rb
595
+ - spec/maths/statistics/vector_spec.rb
571
596
  - spec/monkeys_spec.rb
572
- - spec/plotting/dataframe_spec.rb
573
- - spec/plotting/vector_spec.rb
597
+ - spec/plotting/gruff/category_spec.rb
598
+ - spec/plotting/gruff/dataframe_spec.rb
599
+ - spec/plotting/gruff/vector_spec.rb
600
+ - spec/plotting/nyaplot/category_spec.rb
601
+ - spec/plotting/nyaplot/dataframe_spec.rb
602
+ - spec/plotting/nyaplot/vector_spec.rb
574
603
  - spec/shared/vector_display_spec.rb
575
604
  - spec/spec_helper.rb
576
605
  - spec/support/database_helper.rb
@@ -1,761 +0,0 @@
1
- module Daru
2
- class Index
3
- include Enumerable
4
- # It so happens that over riding the .new method in a super class also
5
- # tampers with the default .new method for class that inherit from the
6
- # super class (Index in this case). Thus we first alias the original
7
- # new method (from Object) to __new__ when the Index class is evaluated,
8
- # and then we use an inherited hook such that the old new method (from
9
- # Object) is once again the default .new for the subclass.
10
- # Refer http://blog.sidu.in/2007/12/rubys-new-as-factory.html
11
- class << self
12
- alias :__new__ :new
13
-
14
- def inherited subclass
15
- class << subclass
16
- alias :new :__new__
17
- end
18
- end
19
- end
20
-
21
- # We over-ride the .new method so that any sort of Index can be generated
22
- # from Daru::Index based on the types of arguments supplied.
23
- def self.new *args, &block
24
- # FIXME: I'm not sure this clever trick really deserves our attention.
25
- # Most of common ruby libraries just avoid it in favor of usual
26
- # factor method, like `Index.create`. When `Index.new(...).class != Index`
27
- # it just leads to confusion and surprises. - zverok, 2016-05-18
28
- source = args.first
29
-
30
- MultiIndex.try_from_tuples(source) ||
31
- DateTimeIndex.try_create(source) ||
32
- allocate.tap { |i| i.send :initialize, *args, &block }
33
- end
34
-
35
- def self.coerce maybe_index
36
- maybe_index.is_a?(Index) ? maybe_index : Daru::Index.new(maybe_index)
37
- end
38
-
39
- def each(&block)
40
- return to_enum(:each) unless block_given?
41
-
42
- @relation_hash.each_key(&block)
43
- self
44
- end
45
-
46
- def map(&block)
47
- to_a.map(&block)
48
- end
49
-
50
- attr_reader :relation_hash, :size
51
-
52
- def initialize index
53
- index =
54
- case index
55
- when nil
56
- []
57
- when Integer
58
- index.times.to_a
59
- when Enumerable
60
- index.to_a
61
- else
62
- raise ArgumentError,
63
- "Cannot create index from #{index.class} #{index.inspect}"
64
- end
65
-
66
- @relation_hash = index.each_with_index.to_h.freeze
67
- @keys = @relation_hash.keys
68
- @size = @relation_hash.size
69
- end
70
-
71
- def ==(other)
72
- return false if self.class != other.class || other.size != @size
73
-
74
- @relation_hash.keys == other.to_a &&
75
- @relation_hash.values == other.relation_hash.values
76
- end
77
-
78
- def [](key, *rest)
79
- case
80
- when key.is_a?(Range)
81
- by_range key
82
- when !rest.empty?
83
- by_multi_key key, *rest
84
- else
85
- by_single_key key
86
- end
87
- end
88
-
89
- # Returns true if all arguments are either a valid category or position
90
- # @param [Array<object>] *indexes categories or positions
91
- # @return [true, false]
92
- # @example
93
- # idx.valid? :a, 2
94
- # # => true
95
- # idx.valid? 3
96
- # # => false
97
- def valid? *indexes
98
- indexes.all? { |i| to_a.include?(i) || (i.is_a?(Numeric) && i < size) }
99
- end
100
-
101
- # Returns positions given indexes or positions
102
- # @note If the arugent is both a valid index and a valid position,
103
- # it will treated as valid index
104
- # @param [Array<object>] *indexes indexes or positions
105
- # @example
106
- # x = Daru::Index.new [:a, :b, :c]
107
- # x.pos :a, 1
108
- # # => [0, 1]
109
- def pos *indexes
110
- indexes = preprocess_range(indexes.first) if indexes.first.is_a? Range
111
-
112
- if indexes.size == 1
113
- self[indexes.first]
114
- else
115
- indexes.map { |index| by_single_key index }
116
- end
117
- end
118
-
119
- def subset *indexes
120
- if indexes.first.is_a? Range
121
- slice indexes.first.begin, indexes.first.end
122
- elsif include? indexes.first
123
- # Assume 'indexes' contain indexes not positions
124
- Daru::Index.new indexes
125
- else
126
- # Assume 'indexes' contain positions not indexes
127
- Daru::Index.new indexes.map { |k| key k }
128
- end
129
- end
130
-
131
- # Takes positional values and returns subset of the self
132
- # capturing the indexes at mentioned positions
133
- # @param [Array<Integer>] positional values
134
- # @return [object] index object
135
- # @example
136
- # idx = Daru::Index.new [:a, :b, :c]
137
- # idx.at 0, 1
138
- # # => #<Daru::Index(2): {a, b}>
139
- def at *positions
140
- positions = preprocess_positions(*positions)
141
- validate_positions(*positions)
142
- if positions.is_a? Integer
143
- key(positions)
144
- else
145
- self.class.new positions.map(&method(:key))
146
- end
147
- end
148
-
149
- def inspect threshold=20
150
- if size <= threshold
151
- "#<#{self.class}(#{size}): {#{to_a.join(', ')}}>"
152
- else
153
- "#<#{self.class}(#{size}): {#{to_a.first(threshold).join(', ')} ... #{to_a.last}}>"
154
- end
155
- end
156
-
157
- def slice *args
158
- start = args[0]
159
- en = args[1]
160
-
161
- if start.is_a?(Integer) && en.is_a?(Integer)
162
- Index.new @keys[start..en]
163
- else
164
- start_idx = @relation_hash[start]
165
- en_idx = @relation_hash[en]
166
-
167
- Index.new @keys[start_idx..en_idx]
168
- end
169
- end
170
-
171
- # Produce new index from the set union of two indexes.
172
- def |(other)
173
- Index.new(to_a | other.to_a)
174
- end
175
-
176
- # Produce a new index from the set intersection of two indexes
177
- def & other
178
- Index.new(to_a & other.to_a)
179
- end
180
-
181
- def to_a
182
- @relation_hash.keys
183
- end
184
-
185
- def key(value)
186
- return nil unless value.is_a?(Numeric)
187
- @keys[value]
188
- end
189
-
190
- def include? index
191
- @relation_hash.key? index
192
- end
193
-
194
- def empty?
195
- @relation_hash.empty?
196
- end
197
-
198
- def dup
199
- Daru::Index.new @relation_hash.keys
200
- end
201
-
202
- def add *indexes
203
- Daru::Index.new(to_a + indexes)
204
- end
205
-
206
- def _dump(*)
207
- Marshal.dump(relation_hash: @relation_hash)
208
- end
209
-
210
- def self._load data
211
- h = Marshal.load data
212
-
213
- Daru::Index.new(h[:relation_hash].keys)
214
- end
215
-
216
- # Provide an Index for sub vector produced
217
- #
218
- # @param input_indexes [Array] the input by user to index the vector
219
- # @return [Object] the Index object for sub vector produced
220
- def conform(*)
221
- self
222
- end
223
-
224
- def reorder(new_order)
225
- from = to_a
226
- self.class.new(new_order.map { |i| from[i] })
227
- end
228
-
229
- private
230
-
231
- def preprocess_range rng
232
- start = rng.begin
233
- en = rng.end
234
-
235
- if start.is_a?(Integer) && en.is_a?(Integer)
236
- @keys[start..en]
237
- else
238
- start_idx = @relation_hash[start]
239
- en_idx = @relation_hash[en]
240
-
241
- @keys[start_idx..en_idx]
242
- end
243
- end
244
-
245
- def by_range rng
246
- slice rng.begin, rng.end
247
- end
248
-
249
- def by_multi_key *key
250
- if include? key[0]
251
- Daru::Index.new key.map { |k| k }
252
- else
253
- # Assume the user is specifing values for index not keys
254
- # Return index object having keys corresponding to values provided
255
- Daru::Index.new key.map { |k| key k }
256
- end
257
- end
258
-
259
- def by_single_key key
260
- if @relation_hash.key?(key)
261
- @relation_hash[key]
262
- elsif key.is_a?(Numeric) && key < size
263
- key
264
- else
265
- raise IndexError, "Specified index #{key.inspect} does not exist"
266
- end
267
- end
268
-
269
- # Raises IndexError when one of the positions is an invalid position
270
- def validate_positions *positions
271
- positions = [positions] if positions.is_a? Integer
272
- positions.each do |pos|
273
- raise IndexError, "#{pos} is not a valid position." if pos >= size
274
- end
275
- end
276
-
277
- # Preprocess ranges, integers and array in appropriate ways
278
- def preprocess_positions *positions
279
- if positions.size == 1
280
- case positions.first
281
- when Integer
282
- positions.first
283
- when Range
284
- size.times.to_a[positions.first]
285
- else
286
- raise ArgumentError, 'Unkown position type.'
287
- end
288
- else
289
- positions
290
- end
291
- end
292
- end # class Index
293
-
294
- class MultiIndex < Index
295
- def each(&block)
296
- to_a.each(&block)
297
- end
298
-
299
- def map(&block)
300
- to_a.map(&block)
301
- end
302
-
303
- attr_reader :labels
304
-
305
- def levels
306
- @levels.map(&:keys)
307
- end
308
-
309
- def initialize opts={}
310
- labels = opts[:labels]
311
- levels = opts[:levels]
312
- raise ArgumentError,
313
- 'Must specify both labels and levels' unless labels && levels
314
- raise ArgumentError,
315
- 'Labels and levels should be same size' if labels.size != levels.size
316
- raise ArgumentError,
317
- 'Incorrect labels and levels' if incorrect_fields?(labels, levels)
318
-
319
- @labels = labels
320
- @levels = levels.map { |e| e.map.with_index.to_h }
321
- end
322
-
323
- def incorrect_fields?(_labels, levels)
324
- levels[0].size # FIXME: without this exact call some specs are failing
325
-
326
- levels.any? { |e| e.uniq.size != e.size }
327
- end
328
-
329
- private :incorrect_fields?
330
-
331
- def self.from_arrays arrays
332
- levels = arrays.map { |e| e.uniq.sort_by(&:to_s) }
333
-
334
- labels = arrays.each_with_index.map do |arry, level_index|
335
- level = levels[level_index]
336
- arry.map { |lvl| level.index(lvl) }
337
- end
338
-
339
- MultiIndex.new labels: labels, levels: levels
340
- end
341
-
342
- def self.from_tuples tuples
343
- from_arrays tuples.transpose
344
- end
345
-
346
- def self.try_from_tuples tuples
347
- if tuples.respond_to?(:first) && tuples.first.is_a?(Array)
348
- from_tuples(tuples)
349
- else
350
- nil
351
- end
352
- end
353
-
354
- def [] *key
355
- key.flatten!
356
- case
357
- when key[0].is_a?(Range)
358
- retrieve_from_range(key[0])
359
- when key[0].is_a?(Integer) && key.size == 1
360
- try_retrieve_from_integer(key[0])
361
- else
362
- begin
363
- retrieve_from_tuples key
364
- rescue NoMethodError
365
- raise IndexError, "Specified index #{key.inspect} do not exist"
366
- end
367
- end
368
- end
369
-
370
- def valid? *indexes
371
- # FIXME: This is perhaps not a good method
372
- pos(*indexes)
373
- return true
374
- rescue IndexError
375
- return false
376
- end
377
-
378
- # Returns positions given indexes or positions
379
- # @note If the arugent is both a valid index and a valid position,
380
- # it will treated as valid index
381
- # @param [Array<object>] *indexes indexes or positions
382
- # @example
383
- # idx = Daru::MultiIndex.from_tuples [[:a, :one], [:a, :two], [:b, :one], [:b, :two]]
384
- # idx.pos :a
385
- # # => [0, 1]
386
- def pos *indexes
387
- if indexes.first.is_a? Integer
388
- return indexes.first if indexes.size == 1
389
- return indexes
390
- end
391
- res = self[indexes]
392
- return res if res.is_a? Integer
393
- res.map { |i| self[i] }
394
- end
395
-
396
- def subset *indexes
397
- if indexes.first.is_a? Integer
398
- MultiIndex.from_tuples(indexes.map { |index| key(index) })
399
- else
400
- self[indexes].conform indexes
401
- end
402
- end
403
-
404
- # Takes positional values and returns subset of the self
405
- # capturing the indexes at mentioned positions
406
- # @param [Array<Integer>] positional values
407
- # @return [object] index object
408
- # @example
409
- # idx = Daru::MultiIndex.from_tuples [[:a, :one], [:a, :two], [:b, :one], [:b, :two]]
410
- # idx.at 0, 1
411
- # # => #<Daru::MultiIndex(2x2)>
412
- # # a one
413
- # # two
414
- def at *positions
415
- positions = preprocess_positions(*positions)
416
- validate_positions(*positions)
417
- if positions.is_a? Integer
418
- key(positions)
419
- else
420
- Daru::MultiIndex.from_tuples positions.map(&method(:key))
421
- end
422
- end
423
-
424
- def add *indexes
425
- Daru::MultiIndex.from_tuples to_a << indexes
426
- end
427
-
428
- def reorder(new_order)
429
- from = to_a
430
- self.class.from_tuples(new_order.map { |i| from[i] })
431
- end
432
-
433
- def try_retrieve_from_integer int
434
- @levels[0].key?(int) ? retrieve_from_tuples([int]) : int
435
- end
436
-
437
- def retrieve_from_range range
438
- MultiIndex.from_tuples(range.map { |index| key(index) })
439
- end
440
-
441
- def retrieve_from_tuples key
442
- chosen = []
443
-
444
- key.each_with_index do |k, depth|
445
- level_index = @levels[depth][k]
446
- raise IndexError, "Specified index #{key.inspect} do not exist" if level_index.nil?
447
- label = @labels[depth]
448
- chosen = find_all_indexes label, level_index, chosen
449
- end
450
-
451
- return chosen[0] if chosen.size == 1 && key.size == @levels.size
452
- multi_index_from_multiple_selections(chosen)
453
- end
454
-
455
- def multi_index_from_multiple_selections chosen
456
- MultiIndex.from_tuples(chosen.map { |e| key(e) })
457
- end
458
-
459
- def find_all_indexes label, level_index, chosen
460
- if chosen.empty?
461
- label.each_with_index
462
- .select { |lbl, _| lbl == level_index }.map(&:last)
463
- else
464
- chosen.keep_if { |c| label[c] == level_index }
465
- end
466
- end
467
-
468
- private :find_all_indexes, :multi_index_from_multiple_selections,
469
- :retrieve_from_range, :retrieve_from_tuples
470
-
471
- def key index
472
- raise ArgumentError,
473
- "Key #{index} is too large" if index >= @labels[0].size
474
-
475
- @labels
476
- .each_with_index
477
- .map { |label, i| @levels[i].keys[label[index]] }
478
- end
479
-
480
- def dup
481
- MultiIndex.new levels: levels.dup, labels: labels
482
- end
483
-
484
- def drop_left_level by=1
485
- MultiIndex.from_arrays to_a.transpose[by..-1]
486
- end
487
-
488
- def | other
489
- MultiIndex.from_tuples(to_a | other.to_a)
490
- end
491
-
492
- def & other
493
- MultiIndex.from_tuples(to_a & other.to_a)
494
- end
495
-
496
- def empty?
497
- @labels.flatten.empty? && @levels.all?(&:empty?)
498
- end
499
-
500
- def include? tuple
501
- return false unless tuple.is_a? Enumerable
502
- tuple.flatten.each_with_index
503
- .all? { |tup, i| @levels[i][tup] }
504
- end
505
-
506
- def size
507
- @labels[0].size
508
- end
509
-
510
- def width
511
- @levels.size
512
- end
513
-
514
- def == other
515
- self.class == other.class &&
516
- labels == other.labels &&
517
- levels == other.levels
518
- end
519
-
520
- def to_a
521
- (0...size).map { |e| key(e) }
522
- end
523
-
524
- def values
525
- Array.new(size) { |i| i }
526
- end
527
-
528
- def inspect threshold=20
529
- "#<Daru::MultiIndex(#{size}x#{width})>\n" +
530
- Formatters::Table.format([], row_headers: sparse_tuples, threshold: threshold)
531
- end
532
-
533
- def to_html
534
- path = File.expand_path('../iruby/templates/multi_index.html.erb', __FILE__)
535
- ERB.new(File.read(path).strip).result(binding)
536
- end
537
-
538
- # Provide a MultiIndex for sub vector produced
539
- #
540
- # @param input_indexes [Array] the input by user to index the vector
541
- # @return [Object] the MultiIndex object for sub vector produced
542
- def conform input_indexes
543
- return self if input_indexes[0].is_a? Range
544
- drop_left_level input_indexes.size
545
- end
546
-
547
- # Return tuples with nils in place of repeating values, like this:
548
- #
549
- # [:a , :bar, :one]
550
- # [nil, nil , :two]
551
- # [nil, :foo, :one]
552
- #
553
- def sparse_tuples
554
- tuples = to_a
555
- [tuples.first] + each_cons(2).map { |prev, cur|
556
- left = cur.zip(prev).drop_while { |c, p| c == p }
557
- [nil] * (cur.size - left.size) + left.map(&:first)
558
- }
559
- end
560
- end
561
-
562
- class CategoricalIndex < Index
563
- # Create a categorical index object.
564
- # @param indexes [Array<object>] array of indexes
565
- # @return [Daru::CategoricalIndex] categorical index
566
- # @example
567
- # Daru::CategoricalIndex.new [:a, 1, :a, 1, :c]
568
- # # => #<Daru::CategoricalIndex(5): {a, 1, a, 1, c}>
569
- def initialize indexes
570
- # Create a hash to map each category to positional indexes
571
- categories = indexes.each_with_index.group_by(&:first)
572
- @cat_hash = categories.map { |cat, group| [cat, group.map(&:last)] }.to_h
573
-
574
- # Map each category to a unique integer for effective storage in @array
575
- map_cat_int = categories.keys.each_with_index.to_h
576
-
577
- # To link every instance to its category,
578
- # it stores integer for every instance representing its category
579
- @array = map_cat_int.values_at(*indexes)
580
- end
581
-
582
- # Duplicates the index object and return it
583
- # @return [Daru::CategoricalIndex] duplicated index object
584
- def dup
585
- # Improve it by intializing index by hash
586
- Daru::CategoricalIndex.new to_a
587
- end
588
-
589
- # Returns true index or category is valid
590
- # @param index [object] the index value to look for
591
- # @return [true, false] true if index is included, false otherwise
592
- def include? index
593
- @cat_hash.include? index
594
- end
595
-
596
- # Returns array of categories
597
- # @example
598
- # x = Daru::CategoricalIndex.new [:a, 1, :a, 1, :c]
599
- # x.categories
600
- # # => [:a, 1, :c]
601
- def categories
602
- @cat_hash.keys
603
- end
604
-
605
- # Returns positions given categories or positions
606
- # @note If the argument does not a valid category it treats it as position
607
- # value and return it as it is.
608
- # @param [Array<object>] *indexes categories or positions
609
- # @example
610
- # x = Daru::CategoricalIndex.new [:a, 1, :a, 1, :c]
611
- # x.pos :a, 1
612
- # # => [0, 1, 2, 3]
613
- def pos *indexes
614
- positions = indexes.map do |index|
615
- if include? index
616
- @cat_hash[index]
617
- elsif index.is_a?(Numeric) && index < @array.size
618
- index
619
- else
620
- raise IndexError, "#{index.inspect} is neither a valid category"\
621
- ' nor a valid position'
622
- end
623
- end
624
-
625
- positions.flatten!
626
- positions.size == 1 ? positions.first : positions.sort
627
- end
628
-
629
- # Returns index value from position
630
- # @param pos [Integer] the position to look for
631
- # @return [object] category corresponding to position
632
- # @example
633
- # idx = Daru::CategoricalIndex.new [:a, :b, :a, :b, :c]
634
- # idx.index_from_pos 1
635
- # # => :b
636
- def index_from_pos pos
637
- cat_from_int @array[pos]
638
- end
639
-
640
- # Returns enumerator enumerating all index values in the order they occur
641
- # @return [Enumerator] all index values
642
- # @example
643
- # idx = Daru::CategoricalIndex.new [:a, :a, :b]
644
- # idx.each.to_a
645
- # # => [:a, :a, :b]
646
- def each
647
- return enum_for(:each) unless block_given?
648
- @array.each { |pos| yield cat_from_int pos }
649
- self
650
- end
651
-
652
- # Compares two index object. Returns true if every instance of category
653
- # occur at the same position
654
- # @param [Daru::CateogricalIndex] other index object to be checked against
655
- # @return [true, false] true if other is similar to self
656
- # @example
657
- # a = Daru::CategoricalIndex.new [:a, :a, :b]
658
- # b = Daru::CategoricalIndex.new [:b, :a, :a]
659
- # a == b
660
- # # => false
661
- def == other
662
- self.class == other.class &&
663
- size == other.size &&
664
- to_h == other.to_h
665
- end
666
-
667
- # Returns all the index values
668
- # @return [Array] all index values
669
- # @example
670
- # idx = Daru::CategoricalIndex.new [:a, :b, :a]
671
- # idx.to_a
672
- def to_a
673
- each.to_a
674
- end
675
-
676
- # Returns hash table mapping category to positions at which they occur
677
- # @return [Hash] hash table mapping category to array of positions
678
- # @example
679
- # idx = Daru::CategoricalIndex.new [:a, :b, :a]
680
- # idx.to_h
681
- # # => {:a=>[0, 2], :b=>[1]}
682
- def to_h
683
- @cat_hash
684
- end
685
-
686
- # Returns size of the index object
687
- # @return [Integer] total number of instances of all categories
688
- # @example
689
- # idx = Daru::CategoricalIndex.new [:a, :b, :a]
690
- # idx.size
691
- # # => 3
692
- def size
693
- @array.size
694
- end
695
-
696
- # Returns true if index object is storing no category
697
- # @return [true, false] true if index object is empty
698
- # @example
699
- # i = Daru::CategoricalIndex.new []
700
- # # => #<Daru::CategoricalIndex(0): {}>
701
- # i.empty?
702
- # # => true
703
- def empty?
704
- @array.empty?
705
- end
706
-
707
- # Return subset given categories or positions
708
- # @param [Array<object>] *indexes categories or positions
709
- # @return [Daru::CategoricalIndex] subset of the self containing the
710
- # mentioned categories or positions
711
- # @example
712
- # idx = Daru::CategoricalIndex.new [:a, :b, :a, :b, :c]
713
- # idx.subset :a, :b
714
- # # => #<Daru::CategoricalIndex(4): {a, b, a, b}>
715
- def subset *indexes
716
- positions = pos(*indexes)
717
- new_index = positions.map { |pos| index_from_pos pos }
718
-
719
- Daru::CategoricalIndex.new new_index.flatten
720
- end
721
-
722
- # Takes positional values and returns subset of the self
723
- # capturing the categories at mentioned positions
724
- # @param [Array<Integer>] positional values
725
- # @return [object] index object
726
- # @example
727
- # idx = Daru::CategoricalIndex.new [:a, :b, :a, :b, :c]
728
- # idx.at 0, 1
729
- # # => #<Daru::CategoricalIndex(2): {a, b}>
730
- def at *positions
731
- positions = preprocess_positions(*positions)
732
- validate_positions(*positions)
733
- if positions.is_a? Integer
734
- index_from_pos(positions)
735
- else
736
- Daru::CategoricalIndex.new positions.map(&method(:index_from_pos))
737
- end
738
- end
739
-
740
- # Add specified index values to the index object
741
- # @param [Array<object>] *indexes index values to add
742
- # @return [Daru::CategoricalIndex] index object with added values
743
- # @example
744
- # idx = Daru::CategoricalIndex.new [:a, :b, :a, :b, :c]
745
- # idx.add :d
746
- # # => #<Daru::CategoricalIndex(6): {a, b, a, b, c, d}>
747
- def add *indexes
748
- Daru::CategoricalIndex.new(to_a + indexes)
749
- end
750
-
751
- private
752
-
753
- def int_from_cat cat
754
- @cat_hash.keys.index cat
755
- end
756
-
757
- def cat_from_int cat
758
- @cat_hash.keys[cat]
759
- end
760
- end
761
- end