daru 0.1.3.1 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rspec +2 -1
  4. data/.rspec_formatter.rb +33 -0
  5. data/.rubocop.yml +26 -2
  6. data/History.md +38 -0
  7. data/README.md +22 -13
  8. data/Rakefile +50 -2
  9. data/benchmarks/csv_reading.rb +22 -0
  10. data/daru.gemspec +9 -2
  11. data/lib/daru.rb +36 -4
  12. data/lib/daru/accessors/array_wrapper.rb +6 -1
  13. data/lib/daru/accessors/dataframe_by_row.rb +10 -2
  14. data/lib/daru/accessors/gsl_wrapper.rb +1 -3
  15. data/lib/daru/accessors/nmatrix_wrapper.rb +9 -0
  16. data/lib/daru/category.rb +935 -0
  17. data/lib/daru/core/group_by.rb +29 -38
  18. data/lib/daru/core/merge.rb +186 -145
  19. data/lib/daru/core/query.rb +22 -11
  20. data/lib/daru/dataframe.rb +976 -885
  21. data/lib/daru/date_time/index.rb +166 -166
  22. data/lib/daru/date_time/offsets.rb +66 -77
  23. data/lib/daru/formatters/table.rb +54 -0
  24. data/lib/daru/helpers/array.rb +40 -0
  25. data/lib/daru/index.rb +476 -73
  26. data/lib/daru/io/io.rb +66 -45
  27. data/lib/daru/io/sql_data_source.rb +33 -62
  28. data/lib/daru/iruby/helpers.rb +38 -0
  29. data/lib/daru/iruby/templates/dataframe.html.erb +52 -0
  30. data/lib/daru/iruby/templates/dataframe_mi.html.erb +58 -0
  31. data/lib/daru/iruby/templates/multi_index.html.erb +12 -0
  32. data/lib/daru/iruby/templates/vector.html.erb +27 -0
  33. data/lib/daru/iruby/templates/vector_mi.html.erb +36 -0
  34. data/lib/daru/maths/arithmetic/dataframe.rb +16 -18
  35. data/lib/daru/maths/arithmetic/vector.rb +4 -6
  36. data/lib/daru/maths/statistics/dataframe.rb +8 -15
  37. data/lib/daru/maths/statistics/vector.rb +120 -98
  38. data/lib/daru/monkeys.rb +12 -40
  39. data/lib/daru/plotting/gruff.rb +3 -0
  40. data/lib/daru/plotting/gruff/category.rb +49 -0
  41. data/lib/daru/plotting/gruff/dataframe.rb +91 -0
  42. data/lib/daru/plotting/gruff/vector.rb +57 -0
  43. data/lib/daru/plotting/nyaplot.rb +3 -0
  44. data/lib/daru/plotting/nyaplot/category.rb +34 -0
  45. data/lib/daru/plotting/nyaplot/dataframe.rb +187 -0
  46. data/lib/daru/plotting/nyaplot/vector.rb +46 -0
  47. data/lib/daru/vector.rb +694 -421
  48. data/lib/daru/version.rb +1 -1
  49. data/profile/_base.rb +23 -0
  50. data/profile/df_to_a.rb +10 -0
  51. data/profile/filter.rb +13 -0
  52. data/profile/joining.rb +13 -0
  53. data/profile/sorting.rb +12 -0
  54. data/profile/vector_each_with_index.rb +9 -0
  55. data/spec/accessors/wrappers_spec.rb +2 -4
  56. data/spec/categorical_spec.rb +1734 -0
  57. data/spec/core/group_by_spec.rb +52 -2
  58. data/spec/core/merge_spec.rb +63 -2
  59. data/spec/core/query_spec.rb +236 -80
  60. data/spec/dataframe_spec.rb +1373 -79
  61. data/spec/date_time/data_spec.rb +3 -5
  62. data/spec/date_time/index_spec.rb +154 -17
  63. data/spec/date_time/offsets_spec.rb +3 -4
  64. data/spec/fixtures/empties.dat +2 -0
  65. data/spec/fixtures/strings.dat +2 -0
  66. data/spec/formatters/table_formatter_spec.rb +99 -0
  67. data/spec/helpers_spec.rb +8 -0
  68. data/spec/index/categorical_index_spec.rb +168 -0
  69. data/spec/index/index_spec.rb +283 -0
  70. data/spec/index/multi_index_spec.rb +570 -0
  71. data/spec/io/io_spec.rb +31 -4
  72. data/spec/io/sql_data_source_spec.rb +0 -1
  73. data/spec/iruby/dataframe_spec.rb +172 -0
  74. data/spec/iruby/helpers_spec.rb +49 -0
  75. data/spec/iruby/multi_index_spec.rb +37 -0
  76. data/spec/iruby/vector_spec.rb +107 -0
  77. data/spec/math/arithmetic/dataframe_spec.rb +71 -13
  78. data/spec/math/arithmetic/vector_spec.rb +8 -10
  79. data/spec/math/statistics/dataframe_spec.rb +3 -5
  80. data/spec/math/statistics/vector_spec.rb +45 -55
  81. data/spec/monkeys_spec.rb +32 -9
  82. data/spec/plotting/dataframe_spec.rb +386 -0
  83. data/spec/plotting/vector_spec.rb +230 -0
  84. data/spec/shared/vector_display_spec.rb +215 -0
  85. data/spec/spec_helper.rb +23 -0
  86. data/spec/vector_spec.rb +905 -138
  87. metadata +143 -11
  88. data/.rubocop_todo.yml +0 -44
  89. data/lib/daru/plotting/dataframe.rb +0 -104
  90. data/lib/daru/plotting/vector.rb +0 -38
  91. data/spec/daru_spec.rb +0 -58
  92. data/spec/index_spec.rb +0 -375
@@ -37,24 +37,14 @@ module Daru
37
37
  # #=> #<DateTime: 2011-05-03T03:15:00+00:00 ((2455685j,11700s,0n),+0s,2299161j)>
38
38
  def initialize opts={}
39
39
  n = opts[:n] || 1
40
-
41
- @offset =
42
- case
43
- when opts[:secs]
44
- Offsets::Second.new(n*opts[:secs])
45
- when opts[:mins]
46
- Offsets::Minute.new(n*opts[:mins])
47
- when opts[:hours]
48
- Offsets::Hour.new(n*opts[:hours])
49
- when opts[:days]
50
- Offsets::Day.new(n*opts[:days])
51
- when opts[:weeks]
52
- Offsets::Day.new(7*n*opts[:weeks])
53
- when opts[:months]
54
- Offsets::Month.new(n*opts[:months])
55
- when opts[:years]
56
- Offsets::Year.new(n*opts[:years])
40
+ Offsets::LIST.each do |key, klass|
41
+ if opts.key?(key)
42
+ @offset = klass.new(n * opts[key])
43
+ break
57
44
  end
45
+ end
46
+
47
+ @offset = Offsets::Day.new(7*n*opts[:weeks]) if opts[:weeks]
58
48
  end
59
49
 
60
50
  # Offset a DateTime forward.
@@ -70,20 +60,46 @@ module Daru
70
60
  def - date_time
71
61
  @offset - date_time
72
62
  end
63
+
64
+ def -@
65
+ NegativeDateOffset.new(self)
66
+ end
67
+ end
68
+
69
+ class NegativeDateOffset
70
+ def initialize(offset)
71
+ @offset = offset
72
+ end
73
+
74
+ def + date_time
75
+ @offset - date_time
76
+ end
77
+
78
+ def - date_time
79
+ @offset + date_time
80
+ end
73
81
  end
74
82
 
75
83
  module Offsets
84
+ class DateOffsetType < DateOffset
85
+ def initialize n=1
86
+ @n = n
87
+ end
88
+
89
+ def freq_string
90
+ (@n == 1 ? '' : @n.to_s) + self.class::FREQ
91
+ end
92
+ end
93
+
76
94
  # Private superclass for Offsets with equal inter-frequencies.
77
95
  # @abstract
78
96
  # @private
79
- class Tick < DateOffset
97
+ class Tick < DateOffsetType
98
+ # @method initialize
80
99
  # Initialize one of the subclasses of Tick with the number of the times
81
100
  # the offset should be applied, which is the supplied as the argument.
82
101
  #
83
102
  # @param n [Integer] The number of times an offset should be applied.
84
- def initialize n=1
85
- @n = n
86
- end
87
103
 
88
104
  def + date_time
89
105
  date_time + @n*multiplier
@@ -102,13 +118,11 @@ module Daru
102
118
  # offset + DateTime.new(2012,5,1,4,3)
103
119
  # #=> #<DateTime: 2012-05-01T04:03:05+00:00 ((2456049j,14585s,0n),+0s,2299161j)>
104
120
  class Second < Tick
121
+ FREQ = 'S'.freeze
122
+
105
123
  def multiplier
106
124
  1.1574074074074073e-05
107
125
  end
108
-
109
- def freq_string
110
- (@n == 1 ? '' : @n.to_s) + 'S'
111
- end
112
126
  end
113
127
 
114
128
  # Create a minutes offset
@@ -119,13 +133,11 @@ module Daru
119
133
  # offset + DateTime.new(2012,5,1,4,3)
120
134
  # #=> #<DateTime: 2012-05-01T04:11:00+00:00 ((2456049j,15060s,0n),+0s,2299161j)>
121
135
  class Minute < Tick
136
+ FREQ = 'M'.freeze
137
+
122
138
  def multiplier
123
139
  0.0006944444444444445
124
140
  end
125
-
126
- def freq_string
127
- (@n == 1 ? '' : @n.to_s) + 'M'
128
- end
129
141
  end
130
142
 
131
143
  # Create an hours offset
@@ -136,13 +148,11 @@ module Daru
136
148
  # offset + DateTime.new(2012,5,1,4,3)
137
149
  # #=> #<DateTime: 2012-05-01T12:03:00+00:00 ((2456049j,43380s,0n),+0s,2299161j)>
138
150
  class Hour < Tick
151
+ FREQ = 'H'.freeze
152
+
139
153
  def multiplier
140
154
  0.041666666666666664
141
155
  end
142
-
143
- def freq_string
144
- (@n == 1 ? '' : @n.to_s) + 'H'
145
- end
146
156
  end
147
157
 
148
158
  # Create an days offset
@@ -153,13 +163,11 @@ module Daru
153
163
  # offset + DateTime.new(2012,5,1,4,3)
154
164
  # #=> #<DateTime: 2012-05-03T04:03:00+00:00 ((2456051j,14580s,0n),+0s,2299161j)>
155
165
  class Day < Tick
166
+ FREQ = 'D'.freeze
167
+
156
168
  def multiplier
157
169
  1.0
158
170
  end
159
-
160
- def freq_string
161
- (@n == 1 ? '' : @n.to_s) + 'D'
162
- end
163
171
  end
164
172
 
165
173
  # Create an months offset
@@ -170,9 +178,7 @@ module Daru
170
178
  # offset + DateTime.new(2012,5,1,4,3)
171
179
  # #=> #<DateTime: 2012-10-01T04:03:00+00:00 ((2456202j,14580s,0n),+0s,2299161j)>
172
180
  class Month < Tick
173
- def freq_string
174
- (@n == 1 ? '' : @n.to_s) + 'MONTH'
175
- end
181
+ FREQ = 'MONTH'.freeze
176
182
 
177
183
  def + date_time
178
184
  date_time >> @n
@@ -191,9 +197,7 @@ module Daru
191
197
  # offset + DateTime.new(2012,5,1,4,3)
192
198
  # #=> #<DateTime: 2014-05-01T04:03:00+00:00 ((2456779j,14580s,0n),+0s,2299161j)>
193
199
  class Year < Tick
194
- def freq_string
195
- (@n == 1 ? '' : @n.to_s) + 'YEAR'
196
- end
200
+ FREQ = 'YEAR'.freeze
197
201
 
198
202
  def + date_time
199
203
  date_time >> @n*12
@@ -247,14 +251,8 @@ module Daru
247
251
  # offset = Daru::Offsets::MonthBegin.new(2)
248
252
  # offset + DateTime.new(2012,5,5)
249
253
  # #=> #<DateTime: 2012-07-01T00:00:00+00:00 ((2456110j,0s,0n),+0s,2299161j)>
250
- class MonthBegin < DateOffset
251
- def initialize n=1
252
- @n = n
253
- end
254
-
255
- def freq_string
256
- (@n == 1 ? '' : @n.to_s) + 'MB'
257
- end
254
+ class MonthBegin < DateOffsetType
255
+ FREQ = 'MB'.freeze
258
256
 
259
257
  def + date_time
260
258
  @n.times do
@@ -288,14 +286,8 @@ module Daru
288
286
  # offset = Daru::Offsets::MonthEnd.new
289
287
  # offset + DateTime.new(2012,5,5)
290
288
  # #=> #<DateTime: 2012-05-31T00:00:00+00:00 ((2456079j,0s,0n),+0s,2299161j)>
291
- class MonthEnd < DateOffset
292
- def initialize n=1
293
- @n = n
294
- end
295
-
296
- def freq_string
297
- (@n == 1 ? '' : @n.to_s) + 'ME'
298
- end
289
+ class MonthEnd < DateOffsetType
290
+ FREQ = 'ME'.freeze
299
291
 
300
292
  def + date_time
301
293
  @n.times do
@@ -333,14 +325,8 @@ module Daru
333
325
  # offset = Daru::Offsets::YearBegin.new(3)
334
326
  # offset + DateTime.new(2012,5,5)
335
327
  # #=> #<DateTime: 2015-01-01T00:00:00+00:00 ((2457024j,0s,0n),+0s,2299161j)>
336
- class YearBegin < DateOffset
337
- def initialize n=1
338
- @n = n
339
- end
340
-
341
- def freq_string
342
- (@n == 1 ? '' : @n.to_s) + 'YB'
343
- end
328
+ class YearBegin < DateOffsetType
329
+ FREQ = 'YB'.freeze
344
330
 
345
331
  def + date_time
346
332
  DateTime.new(date_time.year + @n, 1, 1,
@@ -357,7 +343,7 @@ module Daru
357
343
  end
358
344
 
359
345
  def on_offset? date_time
360
- date_time.month == 1 and date_time.day == 1
346
+ date_time.month == 1 && date_time.day == 1
361
347
  end
362
348
  end
363
349
 
@@ -368,14 +354,8 @@ module Daru
368
354
  # offset = Daru::Offsets::YearEnd.new
369
355
  # offset + DateTime.new(2012,5,5)
370
356
  # #=> #<DateTime: 2012-12-31T00:00:00+00:00 ((2456293j,0s,0n),+0s,2299161j)>
371
- class YearEnd < DateOffset
372
- def initialize n=1
373
- @n = n
374
- end
375
-
376
- def freq_string
377
- (@n == 1 ? '' : @n.to_s) + 'YE'
378
- end
357
+ class YearEnd < DateOffsetType
358
+ FREQ = 'YE'.freeze
379
359
 
380
360
  def + date_time
381
361
  if on_offset?(date_time)
@@ -392,9 +372,18 @@ module Daru
392
372
  end
393
373
 
394
374
  def on_offset? date_time
395
- date_time.month == 12 and date_time.day == 31
375
+ date_time.month == 12 && date_time.day == 31
396
376
  end
397
377
  end
378
+
379
+ LIST = {
380
+ secs: Second,
381
+ mins: Minute,
382
+ hours: Hour,
383
+ days: Day,
384
+ months: Month,
385
+ years: Year
386
+ }.freeze
398
387
  end
399
388
 
400
389
  # rubocop:enable Style/OpMethod
@@ -0,0 +1,54 @@
1
+ module Daru
2
+ module Formatters
3
+ class Table
4
+ def self.format data, options={}
5
+ new(data, options[:headers], options[:row_headers])
6
+ .format(options[:threshold], options[:spacing])
7
+ end
8
+
9
+ def initialize(data, headers, row_headers)
10
+ @data = data || []
11
+ @headers = (headers || []).to_a
12
+ @row_headers = (row_headers || []).to_a
13
+ end
14
+
15
+ DEFAULT_SPACING = 10
16
+ DEFAULT_THRESHOLD = 15
17
+
18
+ def format threshold=nil, spacing=nil
19
+ rows = build_rows(threshold || DEFAULT_THRESHOLD)
20
+
21
+ formatter = construct_formatter rows, spacing || DEFAULT_SPACING
22
+
23
+ rows.map { |r| formatter % r }.join("\n")
24
+ end
25
+
26
+ private
27
+
28
+ def build_rows threshold # rubocop:disable Metrics/AbcSize
29
+ @row_headers.first(threshold).zip(@data).map do |(r, datarow)|
30
+ [*[r].flatten.map(&:to_s), *(datarow || []).map(&method(:pretty_to_s))]
31
+ end.tap do |rows|
32
+ unless @headers.empty?
33
+ spaces_to_add = rows.empty? ? 0 : rows.first.size - @headers.size
34
+ rows.unshift [''] * spaces_to_add + @headers.map(&:to_s)
35
+ end
36
+
37
+ rows << ['...'] * rows.first.count if @row_headers.count > threshold
38
+ end
39
+ end
40
+
41
+ def construct_formatter rows, spacing
42
+ width = rows.flatten.map(&:size).max
43
+ width = [3, width].max # not less than 'nil'
44
+ width = [width, spacing].min # not more than max width
45
+
46
+ " %#{width}.#{width}s" * rows.first.size
47
+ end
48
+
49
+ def pretty_to_s(val)
50
+ val.nil? ? 'nil' : val.to_s
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,40 @@
1
+ module Daru
2
+ module ArrayHelper
3
+ module_function
4
+
5
+ # Recode repeated values on an array, adding the number of repetition
6
+ # at the end
7
+ # Example:
8
+ # a=%w{a b c c d d d e}
9
+ # a.recode_repeated
10
+ # => ["a","b","c_1","c_2","d_1","d_2","d_3","e"]
11
+ def recode_repeated(array)
12
+ return array if array.size == array.uniq.size
13
+
14
+ # create hash of { <name> => 0}
15
+ # for all names which are more than one time in array
16
+ counter = array
17
+ .group_by(&:itself)
18
+ .select { |_, g| g.size > 1 }
19
+ .map(&:first)
20
+ .collect { |n| [n, 0] }.to_h
21
+
22
+ # ...and use this hash for actual recode
23
+ array.collect do |n|
24
+ if counter.key?(n)
25
+ counter[n] += 1
26
+ new_n = '%s_%d' % [n, counter[n]]
27
+ n.is_a?(Symbol) ? new_n.to_sym : new_n
28
+ else
29
+ n
30
+ end
31
+ end
32
+ end
33
+
34
+ def array_of?(array, match)
35
+ array.is_a?(Array) &&
36
+ !array.empty? &&
37
+ array.all? { |el| match === el } # rubocop:disable Style/CaseEquality
38
+ end
39
+ end
40
+ end
@@ -21,19 +21,24 @@ module Daru
21
21
  # We over-ride the .new method so that any sort of Index can be generated
22
22
  # from Daru::Index based on the types of arguments supplied.
23
23
  def self.new *args, &block
24
- source = args[0]
25
-
26
- if source.respond_to?(:first) && source.first.is_a?(Array)
27
- Daru::MultiIndex.from_tuples source
28
- elsif source && source.is_a?(Array) && !source.empty? &&
29
- source.all? { |e| e.is_a?(DateTime) }
30
- Daru::DateTimeIndex.new(source, freq: :infer)
31
- else
24
+ # FIXME: I'm not sure this clever trick really deserves our attention.
25
+ # Most of common ruby libraries just avoid it in favor of usual
26
+ # factor method, like `Index.create`. When `Index.new(...).class != Index`
27
+ # it just leads to confusion and surprises. - zverok, 2016-05-18
28
+ source = args.first
29
+
30
+ MultiIndex.try_from_tuples(source) ||
31
+ DateTimeIndex.try_create(source) ||
32
32
  allocate.tap { |i| i.send :initialize, *args, &block }
33
- end
33
+ end
34
+
35
+ def self.coerce maybe_index
36
+ maybe_index.is_a?(Index) ? maybe_index : Daru::Index.new(maybe_index)
34
37
  end
35
38
 
36
39
  def each(&block)
40
+ return to_enum(:each) unless block_given?
41
+
37
42
  @relation_hash.each_key(&block)
38
43
  self
39
44
  end
@@ -70,36 +75,88 @@ module Daru
70
75
  @relation_hash.values == other.relation_hash.values
71
76
  end
72
77
 
73
- def [](*key)
74
- loc = key[0]
75
-
78
+ def [](key, *rest)
76
79
  case
77
- when loc.is_a?(Range)
78
- first = loc.first
79
- last = loc.last
80
-
81
- slice first, last
82
- when key.size > 1
83
- if include? key[0]
84
- Daru::Index.new key.map { |k| k }
85
- else
86
- # Assume the user is specifing values for index not keys
87
- # Return index object having keys corresponding to values provided
88
- Daru::Index.new key.map { |k| key k }
89
- end
80
+ when key.is_a?(Range)
81
+ by_range key
82
+ when !rest.empty?
83
+ by_multi_key key, *rest
90
84
  else
91
- v = @relation_hash[loc]
92
- unless v
93
- return loc if loc.is_a?(Numeric) && loc < size
94
- raise IndexError, "Specified index #{loc.inspect} does not exist"
95
- end
96
- v
85
+ by_single_key key
86
+ end
87
+ end
88
+
89
+ # Returns true if all arguments are either a valid category or position
90
+ # @param [Array<object>] *indexes categories or positions
91
+ # @return [true, false]
92
+ # @example
93
+ # idx.valid? :a, 2
94
+ # # => true
95
+ # idx.valid? 3
96
+ # # => false
97
+ def valid? *indexes
98
+ indexes.all? { |i| to_a.include?(i) || (i.is_a?(Numeric) && i < size) }
99
+ end
100
+
101
+ # Returns positions given indexes or positions
102
+ # @note If the arugent is both a valid index and a valid position,
103
+ # it will treated as valid index
104
+ # @param [Array<object>] *indexes indexes or positions
105
+ # @example
106
+ # x = Daru::Index.new [:a, :b, :c]
107
+ # x.pos :a, 1
108
+ # # => [0, 1]
109
+ def pos *indexes
110
+ indexes = preprocess_range(indexes.first) if indexes.first.is_a? Range
111
+
112
+ if indexes.size == 1
113
+ self[indexes.first]
114
+ else
115
+ indexes.map { |index| by_single_key index }
116
+ end
117
+ end
118
+
119
+ def subset *indexes
120
+ if indexes.first.is_a? Range
121
+ slice indexes.first.begin, indexes.first.end
122
+ elsif include? indexes.first
123
+ # Assume 'indexes' contain indexes not positions
124
+ Daru::Index.new indexes
125
+ else
126
+ # Assume 'indexes' contain positions not indexes
127
+ Daru::Index.new indexes.map { |k| key k }
128
+ end
129
+ end
130
+
131
+ # Takes positional values and returns subset of the self
132
+ # capturing the indexes at mentioned positions
133
+ # @param [Array<Integer>] positional values
134
+ # @return [object] index object
135
+ # @example
136
+ # idx = Daru::Index.new [:a, :b, :c]
137
+ # idx.at 0, 1
138
+ # # => #<Daru::Index(2): {a, b}>
139
+ def at *positions
140
+ positions = preprocess_positions(*positions)
141
+ validate_positions(*positions)
142
+ if positions.is_a? Integer
143
+ key(positions)
144
+ else
145
+ self.class.new positions.map(&method(:key))
146
+ end
147
+ end
148
+
149
+ def inspect threshold=20
150
+ if size <= threshold
151
+ "#<#{self.class}(#{size}): {#{to_a.join(', ')}}>"
152
+ else
153
+ "#<#{self.class}(#{size}): {#{to_a.first(threshold).join(', ')} ... #{to_a.last}}>"
97
154
  end
98
155
  end
99
156
 
100
157
  def slice *args
101
- start = args[0]
102
- en = args[1]
158
+ start = args[0]
159
+ en = args[1]
103
160
 
104
161
  if start.is_a?(Integer) && en.is_a?(Integer)
105
162
  Index.new @keys[start..en]
@@ -118,6 +175,7 @@ module Daru
118
175
 
119
176
  # Produce a new index from the set intersection of two indexes
120
177
  def & other
178
+ Index.new(to_a & other.to_a)
121
179
  end
122
180
 
123
181
  def to_a
@@ -125,7 +183,8 @@ module Daru
125
183
  end
126
184
 
127
185
  def key(value)
128
- @relation_hash.keys[value]
186
+ return nil unless value.is_a?(Numeric)
187
+ @keys[value]
129
188
  end
130
189
 
131
190
  def include? index
@@ -140,6 +199,10 @@ module Daru
140
199
  Daru::Index.new @relation_hash.keys
141
200
  end
142
201
 
202
+ def add *indexes
203
+ Daru::Index.new(to_a + indexes)
204
+ end
205
+
143
206
  def _dump(*)
144
207
  Marshal.dump(relation_hash: @relation_hash)
145
208
  end
@@ -157,11 +220,78 @@ module Daru
157
220
  def conform(*)
158
221
  self
159
222
  end
223
+
224
+ def reorder(new_order)
225
+ from = to_a
226
+ self.class.new(new_order.map { |i| from[i] })
227
+ end
228
+
229
+ private
230
+
231
+ def preprocess_range rng
232
+ start = rng.begin
233
+ en = rng.end
234
+
235
+ if start.is_a?(Integer) && en.is_a?(Integer)
236
+ @keys[start..en]
237
+ else
238
+ start_idx = @relation_hash[start]
239
+ en_idx = @relation_hash[en]
240
+
241
+ @keys[start_idx..en_idx]
242
+ end
243
+ end
244
+
245
+ def by_range rng
246
+ slice rng.begin, rng.end
247
+ end
248
+
249
+ def by_multi_key *key
250
+ if include? key[0]
251
+ Daru::Index.new key.map { |k| k }
252
+ else
253
+ # Assume the user is specifing values for index not keys
254
+ # Return index object having keys corresponding to values provided
255
+ Daru::Index.new key.map { |k| key k }
256
+ end
257
+ end
258
+
259
+ def by_single_key key
260
+ if @relation_hash.key?(key)
261
+ @relation_hash[key]
262
+ elsif key.is_a?(Numeric) && key < size
263
+ key
264
+ else
265
+ raise IndexError, "Specified index #{key.inspect} does not exist"
266
+ end
267
+ end
268
+
269
+ # Raises IndexError when one of the positions is an invalid position
270
+ def validate_positions *positions
271
+ positions = [positions] if positions.is_a? Integer
272
+ positions.each do |pos|
273
+ raise IndexError, "#{pos} is not a valid position." if pos >= size
274
+ end
275
+ end
276
+
277
+ # Preprocess ranges, integers and array in appropriate ways
278
+ def preprocess_positions *positions
279
+ if positions.size == 1
280
+ case positions.first
281
+ when Integer
282
+ positions.first
283
+ when Range
284
+ size.times.to_a[positions.first]
285
+ else
286
+ raise ArgumentError, 'Unkown position type.'
287
+ end
288
+ else
289
+ positions
290
+ end
291
+ end
160
292
  end # class Index
161
293
 
162
294
  class MultiIndex < Index
163
- include Enumerable
164
-
165
295
  def each(&block)
166
296
  to_a.each(&block)
167
297
  end
@@ -179,7 +309,6 @@ module Daru
179
309
  def initialize opts={}
180
310
  labels = opts[:labels]
181
311
  levels = opts[:levels]
182
-
183
312
  raise ArgumentError,
184
313
  'Must specify both labels and levels' unless labels && levels
185
314
  raise ArgumentError,
@@ -188,31 +317,23 @@ module Daru
188
317
  'Incorrect labels and levels' if incorrect_fields?(labels, levels)
189
318
 
190
319
  @labels = labels
191
- @levels = levels.map { |e| Hash[e.map.with_index.to_a] }
320
+ @levels = levels.map { |e| e.map.with_index.to_h }
192
321
  end
193
322
 
194
323
  def incorrect_fields?(_labels, levels)
195
- levels[0].size # FIXME: without this call everything fails
196
-
197
- correct = levels.all? { |e| e.uniq.size == e.size }
324
+ levels[0].size # FIXME: without this exact call some specs are failing
198
325
 
199
- !correct
326
+ levels.any? { |e| e.uniq.size != e.size }
200
327
  end
201
328
 
202
329
  private :incorrect_fields?
203
330
 
204
331
  def self.from_arrays arrays
205
332
  levels = arrays.map { |e| e.uniq.sort_by(&:to_s) }
206
- labels = []
207
333
 
208
- arrays.each_with_index do |arry, level_index|
209
- label = []
334
+ labels = arrays.each_with_index.map do |arry, level_index|
210
335
  level = levels[level_index]
211
- arry.each do |lvl|
212
- label << level.index(lvl)
213
- end
214
-
215
- labels << label
336
+ arry.map { |lvl| level.index(lvl) }
216
337
  end
217
338
 
218
339
  MultiIndex.new labels: labels, levels: levels
@@ -222,11 +343,21 @@ module Daru
222
343
  from_arrays tuples.transpose
223
344
  end
224
345
 
346
+ def self.try_from_tuples tuples
347
+ if tuples.respond_to?(:first) && tuples.first.is_a?(Array)
348
+ from_tuples(tuples)
349
+ else
350
+ nil
351
+ end
352
+ end
353
+
225
354
  def [] *key
226
355
  key.flatten!
227
356
  case
228
- when key[0].is_a?(Range) then retrieve_from_range(key[0])
229
- when (key[0].is_a?(Integer) and key.size == 1) then try_retrieve_from_integer(key[0])
357
+ when key[0].is_a?(Range)
358
+ retrieve_from_range(key[0])
359
+ when key[0].is_a?(Integer) && key.size == 1
360
+ try_retrieve_from_integer(key[0])
230
361
  else
231
362
  begin
232
363
  retrieve_from_tuples key
@@ -236,9 +367,71 @@ module Daru
236
367
  end
237
368
  end
238
369
 
370
+ def valid? *indexes
371
+ # FIXME: This is perhaps not a good method
372
+ pos(*indexes)
373
+ return true
374
+ rescue IndexError
375
+ return false
376
+ end
377
+
378
+ # Returns positions given indexes or positions
379
+ # @note If the arugent is both a valid index and a valid position,
380
+ # it will treated as valid index
381
+ # @param [Array<object>] *indexes indexes or positions
382
+ # @example
383
+ # idx = Daru::MultiIndex.from_tuples [[:a, :one], [:a, :two], [:b, :one], [:b, :two]]
384
+ # idx.pos :a
385
+ # # => [0, 1]
386
+ def pos *indexes
387
+ if indexes.first.is_a? Integer
388
+ return indexes.first if indexes.size == 1
389
+ return indexes
390
+ end
391
+ res = self[indexes]
392
+ return res if res.is_a? Integer
393
+ res.map { |i| self[i] }
394
+ end
395
+
396
+ def subset *indexes
397
+ if indexes.first.is_a? Integer
398
+ MultiIndex.from_tuples(indexes.map { |index| key(index) })
399
+ else
400
+ self[indexes].conform indexes
401
+ end
402
+ end
403
+
404
+ # Takes positional values and returns subset of the self
405
+ # capturing the indexes at mentioned positions
406
+ # @param [Array<Integer>] positional values
407
+ # @return [object] index object
408
+ # @example
409
+ # idx = Daru::MultiIndex.from_tuples [[:a, :one], [:a, :two], [:b, :one], [:b, :two]]
410
+ # idx.at 0, 1
411
+ # # => #<Daru::MultiIndex(2x2)>
412
+ # # a one
413
+ # # two
414
+ def at *positions
415
+ positions = preprocess_positions(*positions)
416
+ validate_positions(*positions)
417
+ if positions.is_a? Integer
418
+ key(positions)
419
+ else
420
+ Daru::MultiIndex.from_tuples positions.map(&method(:key))
421
+ end
422
+ end
423
+
424
+ def add *indexes
425
+ Daru::MultiIndex.from_tuples to_a << indexes
426
+ end
427
+
428
+ def reorder(new_order)
429
+ from = to_a
430
+ self.class.from_tuples(new_order.map { |i| from[i] })
431
+ end
432
+
239
433
  def try_retrieve_from_integer int
240
- return retrieve_from_tuples([int]) if @levels[0].key?(int)
241
- int
434
+ @levels[0].key?(int) ? retrieve_from_tuples([int]) : int
242
435
  end
243
436
 
244
437
  def retrieve_from_range range
@@ -279,17 +472,9 @@ module Daru
279
472
  raise ArgumentError,
280
473
  "Key #{index} is too large" if index >= @labels[0].size
281
474
 
282
- level_indexes =
283
- @labels.each_with_object([]) do |label, memo|
284
- memo << label[index]
285
- end
286
-
287
- tuple = []
288
- level_indexes.each_with_index do |level_index, i|
289
- tuple << @levels[i].keys[level_index]
290
- end
291
-
292
- tuple
475
+ @labels
476
+ .each_with_index
477
+ .map { |label, i| @levels[i].keys[label[index]] }
293
478
  end
294
479
 
295
480
  def dup
@@ -309,15 +494,13 @@ module Daru
309
494
  end
310
495
 
311
496
  def empty?
312
- @labels.flatten.empty? and @levels.all?(&:empty?)
497
+ @labels.flatten.empty? && @levels.all?(&:empty?)
313
498
  end
314
499
 
315
500
  def include? tuple
316
- tuple.flatten!
317
- tuple.each_with_index do |tup, i|
318
- return false unless @levels[i][tup]
319
- end
320
- true
501
+ return false unless tuple.is_a? Enumerable
502
+ tuple.flatten.each_with_index
503
+ .all? { |tup, i| @levels[i][tup] }
321
504
  end
322
505
 
323
506
  def size
@@ -342,8 +525,14 @@ module Daru
342
525
  Array.new(size) { |i| i }
343
526
  end
344
527
 
345
- def inspect
346
- "Daru::MultiIndex:#{object_id} (levels: #{levels}\nlabels: #{labels})"
528
+ def inspect threshold=20
529
+ "#<Daru::MultiIndex(#{size}x#{width})>\n" +
530
+ Formatters::Table.format([], row_headers: sparse_tuples, threshold: threshold)
531
+ end
532
+
533
+ def to_html
534
+ path = File.expand_path('../iruby/templates/multi_index.html.erb', __FILE__)
535
+ ERB.new(File.read(path).strip).result(binding)
347
536
  end
348
537
 
349
538
  # Provide a MultiIndex for sub vector produced
@@ -354,5 +543,219 @@ module Daru
354
543
  return self if input_indexes[0].is_a? Range
355
544
  drop_left_level input_indexes.size
356
545
  end
546
+
547
+ # Return tuples with nils in place of repeating values, like this:
548
+ #
549
+ # [:a , :bar, :one]
550
+ # [nil, nil , :two]
551
+ # [nil, :foo, :one]
552
+ #
553
+ def sparse_tuples
554
+ tuples = to_a
555
+ [tuples.first] + each_cons(2).map { |prev, cur|
556
+ left = cur.zip(prev).drop_while { |c, p| c == p }
557
+ [nil] * (cur.size - left.size) + left.map(&:first)
558
+ }
559
+ end
560
+ end
561
+
562
+ class CategoricalIndex < Index
563
+ # Create a categorical index object.
564
+ # @param indexes [Array<object>] array of indexes
565
+ # @return [Daru::CategoricalIndex] categorical index
566
+ # @example
567
+ # Daru::CategoricalIndex.new [:a, 1, :a, 1, :c]
568
+ # # => #<Daru::CategoricalIndex(5): {a, 1, a, 1, c}>
569
+ def initialize indexes
570
+ # Create a hash to map each category to positional indexes
571
+ categories = indexes.each_with_index.group_by(&:first)
572
+ @cat_hash = categories.map { |cat, group| [cat, group.map(&:last)] }.to_h
573
+
574
+ # Map each category to a unique integer for effective storage in @array
575
+ map_cat_int = categories.keys.each_with_index.to_h
576
+
577
+ # To link every instance to its category,
578
+ # it stores integer for every instance representing its category
579
+ @array = map_cat_int.values_at(*indexes)
580
+ end
581
+
582
+ # Duplicates the index object and return it
583
+ # @return [Daru::CategoricalIndex] duplicated index object
584
+ def dup
585
+ # Improve it by intializing index by hash
586
+ Daru::CategoricalIndex.new to_a
587
+ end
588
+
589
+ # Returns true index or category is valid
590
+ # @param index [object] the index value to look for
591
+ # @return [true, false] true if index is included, false otherwise
592
+ def include? index
593
+ @cat_hash.include? index
594
+ end
595
+
596
+ # Returns array of categories
597
+ # @example
598
+ # x = Daru::CategoricalIndex.new [:a, 1, :a, 1, :c]
599
+ # x.categories
600
+ # # => [:a, 1, :c]
601
+ def categories
602
+ @cat_hash.keys
603
+ end
604
+
605
+ # Returns positions given categories or positions
606
+ # @note If the argument does not a valid category it treats it as position
607
+ # value and return it as it is.
608
+ # @param [Array<object>] *indexes categories or positions
609
+ # @example
610
+ # x = Daru::CategoricalIndex.new [:a, 1, :a, 1, :c]
611
+ # x.pos :a, 1
612
+ # # => [0, 1, 2, 3]
613
+ def pos *indexes
614
+ positions = indexes.map do |index|
615
+ if include? index
616
+ @cat_hash[index]
617
+ elsif index.is_a?(Numeric) && index < @array.size
618
+ index
619
+ else
620
+ raise IndexError, "#{index.inspect} is neither a valid category"\
621
+ ' nor a valid position'
622
+ end
623
+ end
624
+
625
+ positions.flatten!
626
+ positions.size == 1 ? positions.first : positions.sort
627
+ end
628
+
629
+ # Returns index value from position
630
+ # @param pos [Integer] the position to look for
631
+ # @return [object] category corresponding to position
632
+ # @example
633
+ # idx = Daru::CategoricalIndex.new [:a, :b, :a, :b, :c]
634
+ # idx.index_from_pos 1
635
+ # # => :b
636
+ def index_from_pos pos
637
+ cat_from_int @array[pos]
638
+ end
639
+
640
+ # Returns enumerator enumerating all index values in the order they occur
641
+ # @return [Enumerator] all index values
642
+ # @example
643
+ # idx = Daru::CategoricalIndex.new [:a, :a, :b]
644
+ # idx.each.to_a
645
+ # # => [:a, :a, :b]
646
+ def each
647
+ return enum_for(:each) unless block_given?
648
+ @array.each { |pos| yield cat_from_int pos }
649
+ self
650
+ end
651
+
652
+ # Compares two index object. Returns true if every instance of category
653
+ # occur at the same position
654
+ # @param [Daru::CateogricalIndex] other index object to be checked against
655
+ # @return [true, false] true if other is similar to self
656
+ # @example
657
+ # a = Daru::CategoricalIndex.new [:a, :a, :b]
658
+ # b = Daru::CategoricalIndex.new [:b, :a, :a]
659
+ # a == b
660
+ # # => false
661
+ def == other
662
+ self.class == other.class &&
663
+ size == other.size &&
664
+ to_h == other.to_h
665
+ end
666
+
667
+ # Returns all the index values
668
+ # @return [Array] all index values
669
+ # @example
670
+ # idx = Daru::CategoricalIndex.new [:a, :b, :a]
671
+ # idx.to_a
672
+ def to_a
673
+ each.to_a
674
+ end
675
+
676
+ # Returns hash table mapping category to positions at which they occur
677
+ # @return [Hash] hash table mapping category to array of positions
678
+ # @example
679
+ # idx = Daru::CategoricalIndex.new [:a, :b, :a]
680
+ # idx.to_h
681
+ # # => {:a=>[0, 2], :b=>[1]}
682
+ def to_h
683
+ @cat_hash
684
+ end
685
+
686
+ # Returns size of the index object
687
+ # @return [Integer] total number of instances of all categories
688
+ # @example
689
+ # idx = Daru::CategoricalIndex.new [:a, :b, :a]
690
+ # idx.size
691
+ # # => 3
692
+ def size
693
+ @array.size
694
+ end
695
+
696
+ # Returns true if index object is storing no category
697
+ # @return [true, false] true if index object is empty
698
+ # @example
699
+ # i = Daru::CategoricalIndex.new []
700
+ # # => #<Daru::CategoricalIndex(0): {}>
701
+ # i.empty?
702
+ # # => true
703
+ def empty?
704
+ @array.empty?
705
+ end
706
+
707
+ # Return subset given categories or positions
708
+ # @param [Array<object>] *indexes categories or positions
709
+ # @return [Daru::CategoricalIndex] subset of the self containing the
710
+ # mentioned categories or positions
711
+ # @example
712
+ # idx = Daru::CategoricalIndex.new [:a, :b, :a, :b, :c]
713
+ # idx.subset :a, :b
714
+ # # => #<Daru::CategoricalIndex(4): {a, b, a, b}>
715
+ def subset *indexes
716
+ positions = pos(*indexes)
717
+ new_index = positions.map { |pos| index_from_pos pos }
718
+
719
+ Daru::CategoricalIndex.new new_index.flatten
720
+ end
721
+
722
+ # Takes positional values and returns subset of the self
723
+ # capturing the categories at mentioned positions
724
+ # @param [Array<Integer>] positional values
725
+ # @return [object] index object
726
+ # @example
727
+ # idx = Daru::CategoricalIndex.new [:a, :b, :a, :b, :c]
728
+ # idx.at 0, 1
729
+ # # => #<Daru::CategoricalIndex(2): {a, b}>
730
+ def at *positions
731
+ positions = preprocess_positions(*positions)
732
+ validate_positions(*positions)
733
+ if positions.is_a? Integer
734
+ index_from_pos(positions)
735
+ else
736
+ Daru::CategoricalIndex.new positions.map(&method(:index_from_pos))
737
+ end
738
+ end
739
+
740
+ # Add specified index values to the index object
741
+ # @param [Array<object>] *indexes index values to add
742
+ # @return [Daru::CategoricalIndex] index object with added values
743
+ # @example
744
+ # idx = Daru::CategoricalIndex.new [:a, :b, :a, :b, :c]
745
+ # idx.add :d
746
+ # # => #<Daru::CategoricalIndex(6): {a, b, a, b, c, d}>
747
+ def add *indexes
748
+ Daru::CategoricalIndex.new(to_a + indexes)
749
+ end
750
+
751
+ private
752
+
753
+ def int_from_cat cat
754
+ @cat_hash.keys.index cat
755
+ end
756
+
757
+ def cat_from_int cat
758
+ @cat_hash.keys[cat]
759
+ end
357
760
  end
358
761
  end