daru 0.1.3.1 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rspec +2 -1
  4. data/.rspec_formatter.rb +33 -0
  5. data/.rubocop.yml +26 -2
  6. data/History.md +38 -0
  7. data/README.md +22 -13
  8. data/Rakefile +50 -2
  9. data/benchmarks/csv_reading.rb +22 -0
  10. data/daru.gemspec +9 -2
  11. data/lib/daru.rb +36 -4
  12. data/lib/daru/accessors/array_wrapper.rb +6 -1
  13. data/lib/daru/accessors/dataframe_by_row.rb +10 -2
  14. data/lib/daru/accessors/gsl_wrapper.rb +1 -3
  15. data/lib/daru/accessors/nmatrix_wrapper.rb +9 -0
  16. data/lib/daru/category.rb +935 -0
  17. data/lib/daru/core/group_by.rb +29 -38
  18. data/lib/daru/core/merge.rb +186 -145
  19. data/lib/daru/core/query.rb +22 -11
  20. data/lib/daru/dataframe.rb +976 -885
  21. data/lib/daru/date_time/index.rb +166 -166
  22. data/lib/daru/date_time/offsets.rb +66 -77
  23. data/lib/daru/formatters/table.rb +54 -0
  24. data/lib/daru/helpers/array.rb +40 -0
  25. data/lib/daru/index.rb +476 -73
  26. data/lib/daru/io/io.rb +66 -45
  27. data/lib/daru/io/sql_data_source.rb +33 -62
  28. data/lib/daru/iruby/helpers.rb +38 -0
  29. data/lib/daru/iruby/templates/dataframe.html.erb +52 -0
  30. data/lib/daru/iruby/templates/dataframe_mi.html.erb +58 -0
  31. data/lib/daru/iruby/templates/multi_index.html.erb +12 -0
  32. data/lib/daru/iruby/templates/vector.html.erb +27 -0
  33. data/lib/daru/iruby/templates/vector_mi.html.erb +36 -0
  34. data/lib/daru/maths/arithmetic/dataframe.rb +16 -18
  35. data/lib/daru/maths/arithmetic/vector.rb +4 -6
  36. data/lib/daru/maths/statistics/dataframe.rb +8 -15
  37. data/lib/daru/maths/statistics/vector.rb +120 -98
  38. data/lib/daru/monkeys.rb +12 -40
  39. data/lib/daru/plotting/gruff.rb +3 -0
  40. data/lib/daru/plotting/gruff/category.rb +49 -0
  41. data/lib/daru/plotting/gruff/dataframe.rb +91 -0
  42. data/lib/daru/plotting/gruff/vector.rb +57 -0
  43. data/lib/daru/plotting/nyaplot.rb +3 -0
  44. data/lib/daru/plotting/nyaplot/category.rb +34 -0
  45. data/lib/daru/plotting/nyaplot/dataframe.rb +187 -0
  46. data/lib/daru/plotting/nyaplot/vector.rb +46 -0
  47. data/lib/daru/vector.rb +694 -421
  48. data/lib/daru/version.rb +1 -1
  49. data/profile/_base.rb +23 -0
  50. data/profile/df_to_a.rb +10 -0
  51. data/profile/filter.rb +13 -0
  52. data/profile/joining.rb +13 -0
  53. data/profile/sorting.rb +12 -0
  54. data/profile/vector_each_with_index.rb +9 -0
  55. data/spec/accessors/wrappers_spec.rb +2 -4
  56. data/spec/categorical_spec.rb +1734 -0
  57. data/spec/core/group_by_spec.rb +52 -2
  58. data/spec/core/merge_spec.rb +63 -2
  59. data/spec/core/query_spec.rb +236 -80
  60. data/spec/dataframe_spec.rb +1373 -79
  61. data/spec/date_time/data_spec.rb +3 -5
  62. data/spec/date_time/index_spec.rb +154 -17
  63. data/spec/date_time/offsets_spec.rb +3 -4
  64. data/spec/fixtures/empties.dat +2 -0
  65. data/spec/fixtures/strings.dat +2 -0
  66. data/spec/formatters/table_formatter_spec.rb +99 -0
  67. data/spec/helpers_spec.rb +8 -0
  68. data/spec/index/categorical_index_spec.rb +168 -0
  69. data/spec/index/index_spec.rb +283 -0
  70. data/spec/index/multi_index_spec.rb +570 -0
  71. data/spec/io/io_spec.rb +31 -4
  72. data/spec/io/sql_data_source_spec.rb +0 -1
  73. data/spec/iruby/dataframe_spec.rb +172 -0
  74. data/spec/iruby/helpers_spec.rb +49 -0
  75. data/spec/iruby/multi_index_spec.rb +37 -0
  76. data/spec/iruby/vector_spec.rb +107 -0
  77. data/spec/math/arithmetic/dataframe_spec.rb +71 -13
  78. data/spec/math/arithmetic/vector_spec.rb +8 -10
  79. data/spec/math/statistics/dataframe_spec.rb +3 -5
  80. data/spec/math/statistics/vector_spec.rb +45 -55
  81. data/spec/monkeys_spec.rb +32 -9
  82. data/spec/plotting/dataframe_spec.rb +386 -0
  83. data/spec/plotting/vector_spec.rb +230 -0
  84. data/spec/shared/vector_display_spec.rb +215 -0
  85. data/spec/spec_helper.rb +23 -0
  86. data/spec/vector_spec.rb +905 -138
  87. metadata +143 -11
  88. data/.rubocop_todo.yml +0 -44
  89. data/lib/daru/plotting/dataframe.rb +0 -104
  90. data/lib/daru/plotting/vector.rb +0 -38
  91. data/spec/daru_spec.rb +0 -58
  92. data/spec/index_spec.rb +0 -375
@@ -37,24 +37,14 @@ module Daru
37
37
  # #=> #<DateTime: 2011-05-03T03:15:00+00:00 ((2455685j,11700s,0n),+0s,2299161j)>
38
38
  def initialize opts={}
39
39
  n = opts[:n] || 1
40
-
41
- @offset =
42
- case
43
- when opts[:secs]
44
- Offsets::Second.new(n*opts[:secs])
45
- when opts[:mins]
46
- Offsets::Minute.new(n*opts[:mins])
47
- when opts[:hours]
48
- Offsets::Hour.new(n*opts[:hours])
49
- when opts[:days]
50
- Offsets::Day.new(n*opts[:days])
51
- when opts[:weeks]
52
- Offsets::Day.new(7*n*opts[:weeks])
53
- when opts[:months]
54
- Offsets::Month.new(n*opts[:months])
55
- when opts[:years]
56
- Offsets::Year.new(n*opts[:years])
40
+ Offsets::LIST.each do |key, klass|
41
+ if opts.key?(key)
42
+ @offset = klass.new(n * opts[key])
43
+ break
57
44
  end
45
+ end
46
+
47
+ @offset = Offsets::Day.new(7*n*opts[:weeks]) if opts[:weeks]
58
48
  end
59
49
 
60
50
  # Offset a DateTime forward.
@@ -70,20 +60,46 @@ module Daru
70
60
  def - date_time
71
61
  @offset - date_time
72
62
  end
63
+
64
+ def -@
65
+ NegativeDateOffset.new(self)
66
+ end
67
+ end
68
+
69
+ class NegativeDateOffset
70
+ def initialize(offset)
71
+ @offset = offset
72
+ end
73
+
74
+ def + date_time
75
+ @offset - date_time
76
+ end
77
+
78
+ def - date_time
79
+ @offset + date_time
80
+ end
73
81
  end
74
82
 
75
83
  module Offsets
84
+ class DateOffsetType < DateOffset
85
+ def initialize n=1
86
+ @n = n
87
+ end
88
+
89
+ def freq_string
90
+ (@n == 1 ? '' : @n.to_s) + self.class::FREQ
91
+ end
92
+ end
93
+
76
94
  # Private superclass for Offsets with equal inter-frequencies.
77
95
  # @abstract
78
96
  # @private
79
- class Tick < DateOffset
97
+ class Tick < DateOffsetType
98
+ # @method initialize
80
99
  # Initialize one of the subclasses of Tick with the number of the times
81
100
  # the offset should be applied, which is the supplied as the argument.
82
101
  #
83
102
  # @param n [Integer] The number of times an offset should be applied.
84
- def initialize n=1
85
- @n = n
86
- end
87
103
 
88
104
  def + date_time
89
105
  date_time + @n*multiplier
@@ -102,13 +118,11 @@ module Daru
102
118
  # offset + DateTime.new(2012,5,1,4,3)
103
119
  # #=> #<DateTime: 2012-05-01T04:03:05+00:00 ((2456049j,14585s,0n),+0s,2299161j)>
104
120
  class Second < Tick
121
+ FREQ = 'S'.freeze
122
+
105
123
  def multiplier
106
124
  1.1574074074074073e-05
107
125
  end
108
-
109
- def freq_string
110
- (@n == 1 ? '' : @n.to_s) + 'S'
111
- end
112
126
  end
113
127
 
114
128
  # Create a minutes offset
@@ -119,13 +133,11 @@ module Daru
119
133
  # offset + DateTime.new(2012,5,1,4,3)
120
134
  # #=> #<DateTime: 2012-05-01T04:11:00+00:00 ((2456049j,15060s,0n),+0s,2299161j)>
121
135
  class Minute < Tick
136
+ FREQ = 'M'.freeze
137
+
122
138
  def multiplier
123
139
  0.0006944444444444445
124
140
  end
125
-
126
- def freq_string
127
- (@n == 1 ? '' : @n.to_s) + 'M'
128
- end
129
141
  end
130
142
 
131
143
  # Create an hours offset
@@ -136,13 +148,11 @@ module Daru
136
148
  # offset + DateTime.new(2012,5,1,4,3)
137
149
  # #=> #<DateTime: 2012-05-01T12:03:00+00:00 ((2456049j,43380s,0n),+0s,2299161j)>
138
150
  class Hour < Tick
151
+ FREQ = 'H'.freeze
152
+
139
153
  def multiplier
140
154
  0.041666666666666664
141
155
  end
142
-
143
- def freq_string
144
- (@n == 1 ? '' : @n.to_s) + 'H'
145
- end
146
156
  end
147
157
 
148
158
  # Create an days offset
@@ -153,13 +163,11 @@ module Daru
153
163
  # offset + DateTime.new(2012,5,1,4,3)
154
164
  # #=> #<DateTime: 2012-05-03T04:03:00+00:00 ((2456051j,14580s,0n),+0s,2299161j)>
155
165
  class Day < Tick
166
+ FREQ = 'D'.freeze
167
+
156
168
  def multiplier
157
169
  1.0
158
170
  end
159
-
160
- def freq_string
161
- (@n == 1 ? '' : @n.to_s) + 'D'
162
- end
163
171
  end
164
172
 
165
173
  # Create an months offset
@@ -170,9 +178,7 @@ module Daru
170
178
  # offset + DateTime.new(2012,5,1,4,3)
171
179
  # #=> #<DateTime: 2012-10-01T04:03:00+00:00 ((2456202j,14580s,0n),+0s,2299161j)>
172
180
  class Month < Tick
173
- def freq_string
174
- (@n == 1 ? '' : @n.to_s) + 'MONTH'
175
- end
181
+ FREQ = 'MONTH'.freeze
176
182
 
177
183
  def + date_time
178
184
  date_time >> @n
@@ -191,9 +197,7 @@ module Daru
191
197
  # offset + DateTime.new(2012,5,1,4,3)
192
198
  # #=> #<DateTime: 2014-05-01T04:03:00+00:00 ((2456779j,14580s,0n),+0s,2299161j)>
193
199
  class Year < Tick
194
- def freq_string
195
- (@n == 1 ? '' : @n.to_s) + 'YEAR'
196
- end
200
+ FREQ = 'YEAR'.freeze
197
201
 
198
202
  def + date_time
199
203
  date_time >> @n*12
@@ -247,14 +251,8 @@ module Daru
247
251
  # offset = Daru::Offsets::MonthBegin.new(2)
248
252
  # offset + DateTime.new(2012,5,5)
249
253
  # #=> #<DateTime: 2012-07-01T00:00:00+00:00 ((2456110j,0s,0n),+0s,2299161j)>
250
- class MonthBegin < DateOffset
251
- def initialize n=1
252
- @n = n
253
- end
254
-
255
- def freq_string
256
- (@n == 1 ? '' : @n.to_s) + 'MB'
257
- end
254
+ class MonthBegin < DateOffsetType
255
+ FREQ = 'MB'.freeze
258
256
 
259
257
  def + date_time
260
258
  @n.times do
@@ -288,14 +286,8 @@ module Daru
288
286
  # offset = Daru::Offsets::MonthEnd.new
289
287
  # offset + DateTime.new(2012,5,5)
290
288
  # #=> #<DateTime: 2012-05-31T00:00:00+00:00 ((2456079j,0s,0n),+0s,2299161j)>
291
- class MonthEnd < DateOffset
292
- def initialize n=1
293
- @n = n
294
- end
295
-
296
- def freq_string
297
- (@n == 1 ? '' : @n.to_s) + 'ME'
298
- end
289
+ class MonthEnd < DateOffsetType
290
+ FREQ = 'ME'.freeze
299
291
 
300
292
  def + date_time
301
293
  @n.times do
@@ -333,14 +325,8 @@ module Daru
333
325
  # offset = Daru::Offsets::YearBegin.new(3)
334
326
  # offset + DateTime.new(2012,5,5)
335
327
  # #=> #<DateTime: 2015-01-01T00:00:00+00:00 ((2457024j,0s,0n),+0s,2299161j)>
336
- class YearBegin < DateOffset
337
- def initialize n=1
338
- @n = n
339
- end
340
-
341
- def freq_string
342
- (@n == 1 ? '' : @n.to_s) + 'YB'
343
- end
328
+ class YearBegin < DateOffsetType
329
+ FREQ = 'YB'.freeze
344
330
 
345
331
  def + date_time
346
332
  DateTime.new(date_time.year + @n, 1, 1,
@@ -357,7 +343,7 @@ module Daru
357
343
  end
358
344
 
359
345
  def on_offset? date_time
360
- date_time.month == 1 and date_time.day == 1
346
+ date_time.month == 1 && date_time.day == 1
361
347
  end
362
348
  end
363
349
 
@@ -368,14 +354,8 @@ module Daru
368
354
  # offset = Daru::Offsets::YearEnd.new
369
355
  # offset + DateTime.new(2012,5,5)
370
356
  # #=> #<DateTime: 2012-12-31T00:00:00+00:00 ((2456293j,0s,0n),+0s,2299161j)>
371
- class YearEnd < DateOffset
372
- def initialize n=1
373
- @n = n
374
- end
375
-
376
- def freq_string
377
- (@n == 1 ? '' : @n.to_s) + 'YE'
378
- end
357
+ class YearEnd < DateOffsetType
358
+ FREQ = 'YE'.freeze
379
359
 
380
360
  def + date_time
381
361
  if on_offset?(date_time)
@@ -392,9 +372,18 @@ module Daru
392
372
  end
393
373
 
394
374
  def on_offset? date_time
395
- date_time.month == 12 and date_time.day == 31
375
+ date_time.month == 12 && date_time.day == 31
396
376
  end
397
377
  end
378
+
379
+ LIST = {
380
+ secs: Second,
381
+ mins: Minute,
382
+ hours: Hour,
383
+ days: Day,
384
+ months: Month,
385
+ years: Year
386
+ }.freeze
398
387
  end
399
388
 
400
389
  # rubocop:enable Style/OpMethod
@@ -0,0 +1,54 @@
1
+ module Daru
2
+ module Formatters
3
+ class Table
4
+ def self.format data, options={}
5
+ new(data, options[:headers], options[:row_headers])
6
+ .format(options[:threshold], options[:spacing])
7
+ end
8
+
9
+ def initialize(data, headers, row_headers)
10
+ @data = data || []
11
+ @headers = (headers || []).to_a
12
+ @row_headers = (row_headers || []).to_a
13
+ end
14
+
15
+ DEFAULT_SPACING = 10
16
+ DEFAULT_THRESHOLD = 15
17
+
18
+ def format threshold=nil, spacing=nil
19
+ rows = build_rows(threshold || DEFAULT_THRESHOLD)
20
+
21
+ formatter = construct_formatter rows, spacing || DEFAULT_SPACING
22
+
23
+ rows.map { |r| formatter % r }.join("\n")
24
+ end
25
+
26
+ private
27
+
28
+ def build_rows threshold # rubocop:disable Metrics/AbcSize
29
+ @row_headers.first(threshold).zip(@data).map do |(r, datarow)|
30
+ [*[r].flatten.map(&:to_s), *(datarow || []).map(&method(:pretty_to_s))]
31
+ end.tap do |rows|
32
+ unless @headers.empty?
33
+ spaces_to_add = rows.empty? ? 0 : rows.first.size - @headers.size
34
+ rows.unshift [''] * spaces_to_add + @headers.map(&:to_s)
35
+ end
36
+
37
+ rows << ['...'] * rows.first.count if @row_headers.count > threshold
38
+ end
39
+ end
40
+
41
+ def construct_formatter rows, spacing
42
+ width = rows.flatten.map(&:size).max
43
+ width = [3, width].max # not less than 'nil'
44
+ width = [width, spacing].min # not more than max width
45
+
46
+ " %#{width}.#{width}s" * rows.first.size
47
+ end
48
+
49
+ def pretty_to_s(val)
50
+ val.nil? ? 'nil' : val.to_s
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,40 @@
1
+ module Daru
2
+ module ArrayHelper
3
+ module_function
4
+
5
+ # Recode repeated values on an array, adding the number of repetition
6
+ # at the end
7
+ # Example:
8
+ # a=%w{a b c c d d d e}
9
+ # a.recode_repeated
10
+ # => ["a","b","c_1","c_2","d_1","d_2","d_3","e"]
11
+ def recode_repeated(array)
12
+ return array if array.size == array.uniq.size
13
+
14
+ # create hash of { <name> => 0}
15
+ # for all names which are more than one time in array
16
+ counter = array
17
+ .group_by(&:itself)
18
+ .select { |_, g| g.size > 1 }
19
+ .map(&:first)
20
+ .collect { |n| [n, 0] }.to_h
21
+
22
+ # ...and use this hash for actual recode
23
+ array.collect do |n|
24
+ if counter.key?(n)
25
+ counter[n] += 1
26
+ new_n = '%s_%d' % [n, counter[n]]
27
+ n.is_a?(Symbol) ? new_n.to_sym : new_n
28
+ else
29
+ n
30
+ end
31
+ end
32
+ end
33
+
34
+ def array_of?(array, match)
35
+ array.is_a?(Array) &&
36
+ !array.empty? &&
37
+ array.all? { |el| match === el } # rubocop:disable Style/CaseEquality
38
+ end
39
+ end
40
+ end
@@ -21,19 +21,24 @@ module Daru
21
21
  # We over-ride the .new method so that any sort of Index can be generated
22
22
  # from Daru::Index based on the types of arguments supplied.
23
23
  def self.new *args, &block
24
- source = args[0]
25
-
26
- if source.respond_to?(:first) && source.first.is_a?(Array)
27
- Daru::MultiIndex.from_tuples source
28
- elsif source && source.is_a?(Array) && !source.empty? &&
29
- source.all? { |e| e.is_a?(DateTime) }
30
- Daru::DateTimeIndex.new(source, freq: :infer)
31
- else
24
+ # FIXME: I'm not sure this clever trick really deserves our attention.
25
+ # Most of common ruby libraries just avoid it in favor of usual
26
+ # factor method, like `Index.create`. When `Index.new(...).class != Index`
27
+ # it just leads to confusion and surprises. - zverok, 2016-05-18
28
+ source = args.first
29
+
30
+ MultiIndex.try_from_tuples(source) ||
31
+ DateTimeIndex.try_create(source) ||
32
32
  allocate.tap { |i| i.send :initialize, *args, &block }
33
- end
33
+ end
34
+
35
+ def self.coerce maybe_index
36
+ maybe_index.is_a?(Index) ? maybe_index : Daru::Index.new(maybe_index)
34
37
  end
35
38
 
36
39
  def each(&block)
40
+ return to_enum(:each) unless block_given?
41
+
37
42
  @relation_hash.each_key(&block)
38
43
  self
39
44
  end
@@ -70,36 +75,88 @@ module Daru
70
75
  @relation_hash.values == other.relation_hash.values
71
76
  end
72
77
 
73
- def [](*key)
74
- loc = key[0]
75
-
78
+ def [](key, *rest)
76
79
  case
77
- when loc.is_a?(Range)
78
- first = loc.first
79
- last = loc.last
80
-
81
- slice first, last
82
- when key.size > 1
83
- if include? key[0]
84
- Daru::Index.new key.map { |k| k }
85
- else
86
- # Assume the user is specifing values for index not keys
87
- # Return index object having keys corresponding to values provided
88
- Daru::Index.new key.map { |k| key k }
89
- end
80
+ when key.is_a?(Range)
81
+ by_range key
82
+ when !rest.empty?
83
+ by_multi_key key, *rest
90
84
  else
91
- v = @relation_hash[loc]
92
- unless v
93
- return loc if loc.is_a?(Numeric) && loc < size
94
- raise IndexError, "Specified index #{loc.inspect} does not exist"
95
- end
96
- v
85
+ by_single_key key
86
+ end
87
+ end
88
+
89
+ # Returns true if all arguments are either a valid category or position
90
+ # @param [Array<object>] *indexes categories or positions
91
+ # @return [true, false]
92
+ # @example
93
+ # idx.valid? :a, 2
94
+ # # => true
95
+ # idx.valid? 3
96
+ # # => false
97
+ def valid? *indexes
98
+ indexes.all? { |i| to_a.include?(i) || (i.is_a?(Numeric) && i < size) }
99
+ end
100
+
101
+ # Returns positions given indexes or positions
102
+ # @note If the arugent is both a valid index and a valid position,
103
+ # it will treated as valid index
104
+ # @param [Array<object>] *indexes indexes or positions
105
+ # @example
106
+ # x = Daru::Index.new [:a, :b, :c]
107
+ # x.pos :a, 1
108
+ # # => [0, 1]
109
+ def pos *indexes
110
+ indexes = preprocess_range(indexes.first) if indexes.first.is_a? Range
111
+
112
+ if indexes.size == 1
113
+ self[indexes.first]
114
+ else
115
+ indexes.map { |index| by_single_key index }
116
+ end
117
+ end
118
+
119
+ def subset *indexes
120
+ if indexes.first.is_a? Range
121
+ slice indexes.first.begin, indexes.first.end
122
+ elsif include? indexes.first
123
+ # Assume 'indexes' contain indexes not positions
124
+ Daru::Index.new indexes
125
+ else
126
+ # Assume 'indexes' contain positions not indexes
127
+ Daru::Index.new indexes.map { |k| key k }
128
+ end
129
+ end
130
+
131
+ # Takes positional values and returns subset of the self
132
+ # capturing the indexes at mentioned positions
133
+ # @param [Array<Integer>] positional values
134
+ # @return [object] index object
135
+ # @example
136
+ # idx = Daru::Index.new [:a, :b, :c]
137
+ # idx.at 0, 1
138
+ # # => #<Daru::Index(2): {a, b}>
139
+ def at *positions
140
+ positions = preprocess_positions(*positions)
141
+ validate_positions(*positions)
142
+ if positions.is_a? Integer
143
+ key(positions)
144
+ else
145
+ self.class.new positions.map(&method(:key))
146
+ end
147
+ end
148
+
149
+ def inspect threshold=20
150
+ if size <= threshold
151
+ "#<#{self.class}(#{size}): {#{to_a.join(', ')}}>"
152
+ else
153
+ "#<#{self.class}(#{size}): {#{to_a.first(threshold).join(', ')} ... #{to_a.last}}>"
97
154
  end
98
155
  end
99
156
 
100
157
  def slice *args
101
- start = args[0]
102
- en = args[1]
158
+ start = args[0]
159
+ en = args[1]
103
160
 
104
161
  if start.is_a?(Integer) && en.is_a?(Integer)
105
162
  Index.new @keys[start..en]
@@ -118,6 +175,7 @@ module Daru
118
175
 
119
176
  # Produce a new index from the set intersection of two indexes
120
177
  def & other
178
+ Index.new(to_a & other.to_a)
121
179
  end
122
180
 
123
181
  def to_a
@@ -125,7 +183,8 @@ module Daru
125
183
  end
126
184
 
127
185
  def key(value)
128
- @relation_hash.keys[value]
186
+ return nil unless value.is_a?(Numeric)
187
+ @keys[value]
129
188
  end
130
189
 
131
190
  def include? index
@@ -140,6 +199,10 @@ module Daru
140
199
  Daru::Index.new @relation_hash.keys
141
200
  end
142
201
 
202
+ def add *indexes
203
+ Daru::Index.new(to_a + indexes)
204
+ end
205
+
143
206
  def _dump(*)
144
207
  Marshal.dump(relation_hash: @relation_hash)
145
208
  end
@@ -157,11 +220,78 @@ module Daru
157
220
  def conform(*)
158
221
  self
159
222
  end
223
+
224
+ def reorder(new_order)
225
+ from = to_a
226
+ self.class.new(new_order.map { |i| from[i] })
227
+ end
228
+
229
+ private
230
+
231
+ def preprocess_range rng
232
+ start = rng.begin
233
+ en = rng.end
234
+
235
+ if start.is_a?(Integer) && en.is_a?(Integer)
236
+ @keys[start..en]
237
+ else
238
+ start_idx = @relation_hash[start]
239
+ en_idx = @relation_hash[en]
240
+
241
+ @keys[start_idx..en_idx]
242
+ end
243
+ end
244
+
245
+ def by_range rng
246
+ slice rng.begin, rng.end
247
+ end
248
+
249
+ def by_multi_key *key
250
+ if include? key[0]
251
+ Daru::Index.new key.map { |k| k }
252
+ else
253
+ # Assume the user is specifing values for index not keys
254
+ # Return index object having keys corresponding to values provided
255
+ Daru::Index.new key.map { |k| key k }
256
+ end
257
+ end
258
+
259
+ def by_single_key key
260
+ if @relation_hash.key?(key)
261
+ @relation_hash[key]
262
+ elsif key.is_a?(Numeric) && key < size
263
+ key
264
+ else
265
+ raise IndexError, "Specified index #{key.inspect} does not exist"
266
+ end
267
+ end
268
+
269
+ # Raises IndexError when one of the positions is an invalid position
270
+ def validate_positions *positions
271
+ positions = [positions] if positions.is_a? Integer
272
+ positions.each do |pos|
273
+ raise IndexError, "#{pos} is not a valid position." if pos >= size
274
+ end
275
+ end
276
+
277
+ # Preprocess ranges, integers and array in appropriate ways
278
+ def preprocess_positions *positions
279
+ if positions.size == 1
280
+ case positions.first
281
+ when Integer
282
+ positions.first
283
+ when Range
284
+ size.times.to_a[positions.first]
285
+ else
286
+ raise ArgumentError, 'Unkown position type.'
287
+ end
288
+ else
289
+ positions
290
+ end
291
+ end
160
292
  end # class Index
161
293
 
162
294
  class MultiIndex < Index
163
- include Enumerable
164
-
165
295
  def each(&block)
166
296
  to_a.each(&block)
167
297
  end
@@ -179,7 +309,6 @@ module Daru
179
309
  def initialize opts={}
180
310
  labels = opts[:labels]
181
311
  levels = opts[:levels]
182
-
183
312
  raise ArgumentError,
184
313
  'Must specify both labels and levels' unless labels && levels
185
314
  raise ArgumentError,
@@ -188,31 +317,23 @@ module Daru
188
317
  'Incorrect labels and levels' if incorrect_fields?(labels, levels)
189
318
 
190
319
  @labels = labels
191
- @levels = levels.map { |e| Hash[e.map.with_index.to_a] }
320
+ @levels = levels.map { |e| e.map.with_index.to_h }
192
321
  end
193
322
 
194
323
  def incorrect_fields?(_labels, levels)
195
- levels[0].size # FIXME: without this call everything fails
196
-
197
- correct = levels.all? { |e| e.uniq.size == e.size }
324
+ levels[0].size # FIXME: without this exact call some specs are failing
198
325
 
199
- !correct
326
+ levels.any? { |e| e.uniq.size != e.size }
200
327
  end
201
328
 
202
329
  private :incorrect_fields?
203
330
 
204
331
  def self.from_arrays arrays
205
332
  levels = arrays.map { |e| e.uniq.sort_by(&:to_s) }
206
- labels = []
207
333
 
208
- arrays.each_with_index do |arry, level_index|
209
- label = []
334
+ labels = arrays.each_with_index.map do |arry, level_index|
210
335
  level = levels[level_index]
211
- arry.each do |lvl|
212
- label << level.index(lvl)
213
- end
214
-
215
- labels << label
336
+ arry.map { |lvl| level.index(lvl) }
216
337
  end
217
338
 
218
339
  MultiIndex.new labels: labels, levels: levels
@@ -222,11 +343,21 @@ module Daru
222
343
  from_arrays tuples.transpose
223
344
  end
224
345
 
346
+ def self.try_from_tuples tuples
347
+ if tuples.respond_to?(:first) && tuples.first.is_a?(Array)
348
+ from_tuples(tuples)
349
+ else
350
+ nil
351
+ end
352
+ end
353
+
225
354
  def [] *key
226
355
  key.flatten!
227
356
  case
228
- when key[0].is_a?(Range) then retrieve_from_range(key[0])
229
- when (key[0].is_a?(Integer) and key.size == 1) then try_retrieve_from_integer(key[0])
357
+ when key[0].is_a?(Range)
358
+ retrieve_from_range(key[0])
359
+ when key[0].is_a?(Integer) && key.size == 1
360
+ try_retrieve_from_integer(key[0])
230
361
  else
231
362
  begin
232
363
  retrieve_from_tuples key
@@ -236,9 +367,71 @@ module Daru
236
367
  end
237
368
  end
238
369
 
370
+ def valid? *indexes
371
+ # FIXME: This is perhaps not a good method
372
+ pos(*indexes)
373
+ return true
374
+ rescue IndexError
375
+ return false
376
+ end
377
+
378
+ # Returns positions given indexes or positions
379
+ # @note If the arugent is both a valid index and a valid position,
380
+ # it will treated as valid index
381
+ # @param [Array<object>] *indexes indexes or positions
382
+ # @example
383
+ # idx = Daru::MultiIndex.from_tuples [[:a, :one], [:a, :two], [:b, :one], [:b, :two]]
384
+ # idx.pos :a
385
+ # # => [0, 1]
386
+ def pos *indexes
387
+ if indexes.first.is_a? Integer
388
+ return indexes.first if indexes.size == 1
389
+ return indexes
390
+ end
391
+ res = self[indexes]
392
+ return res if res.is_a? Integer
393
+ res.map { |i| self[i] }
394
+ end
395
+
396
+ def subset *indexes
397
+ if indexes.first.is_a? Integer
398
+ MultiIndex.from_tuples(indexes.map { |index| key(index) })
399
+ else
400
+ self[indexes].conform indexes
401
+ end
402
+ end
403
+
404
+ # Takes positional values and returns subset of the self
405
+ # capturing the indexes at mentioned positions
406
+ # @param [Array<Integer>] positional values
407
+ # @return [object] index object
408
+ # @example
409
+ # idx = Daru::MultiIndex.from_tuples [[:a, :one], [:a, :two], [:b, :one], [:b, :two]]
410
+ # idx.at 0, 1
411
+ # # => #<Daru::MultiIndex(2x2)>
412
+ # # a one
413
+ # # two
414
+ def at *positions
415
+ positions = preprocess_positions(*positions)
416
+ validate_positions(*positions)
417
+ if positions.is_a? Integer
418
+ key(positions)
419
+ else
420
+ Daru::MultiIndex.from_tuples positions.map(&method(:key))
421
+ end
422
+ end
423
+
424
+ def add *indexes
425
+ Daru::MultiIndex.from_tuples to_a << indexes
426
+ end
427
+
428
+ def reorder(new_order)
429
+ from = to_a
430
+ self.class.from_tuples(new_order.map { |i| from[i] })
431
+ end
432
+
239
433
  def try_retrieve_from_integer int
240
- return retrieve_from_tuples([int]) if @levels[0].key?(int)
241
- int
434
+ @levels[0].key?(int) ? retrieve_from_tuples([int]) : int
242
435
  end
243
436
 
244
437
  def retrieve_from_range range
@@ -279,17 +472,9 @@ module Daru
279
472
  raise ArgumentError,
280
473
  "Key #{index} is too large" if index >= @labels[0].size
281
474
 
282
- level_indexes =
283
- @labels.each_with_object([]) do |label, memo|
284
- memo << label[index]
285
- end
286
-
287
- tuple = []
288
- level_indexes.each_with_index do |level_index, i|
289
- tuple << @levels[i].keys[level_index]
290
- end
291
-
292
- tuple
475
+ @labels
476
+ .each_with_index
477
+ .map { |label, i| @levels[i].keys[label[index]] }
293
478
  end
294
479
 
295
480
  def dup
@@ -309,15 +494,13 @@ module Daru
309
494
  end
310
495
 
311
496
  def empty?
312
- @labels.flatten.empty? and @levels.all?(&:empty?)
497
+ @labels.flatten.empty? && @levels.all?(&:empty?)
313
498
  end
314
499
 
315
500
  def include? tuple
316
- tuple.flatten!
317
- tuple.each_with_index do |tup, i|
318
- return false unless @levels[i][tup]
319
- end
320
- true
501
+ return false unless tuple.is_a? Enumerable
502
+ tuple.flatten.each_with_index
503
+ .all? { |tup, i| @levels[i][tup] }
321
504
  end
322
505
 
323
506
  def size
@@ -342,8 +525,14 @@ module Daru
342
525
  Array.new(size) { |i| i }
343
526
  end
344
527
 
345
- def inspect
346
- "Daru::MultiIndex:#{object_id} (levels: #{levels}\nlabels: #{labels})"
528
+ def inspect threshold=20
529
+ "#<Daru::MultiIndex(#{size}x#{width})>\n" +
530
+ Formatters::Table.format([], row_headers: sparse_tuples, threshold: threshold)
531
+ end
532
+
533
+ def to_html
534
+ path = File.expand_path('../iruby/templates/multi_index.html.erb', __FILE__)
535
+ ERB.new(File.read(path).strip).result(binding)
347
536
  end
348
537
 
349
538
  # Provide a MultiIndex for sub vector produced
@@ -354,5 +543,219 @@ module Daru
354
543
  return self if input_indexes[0].is_a? Range
355
544
  drop_left_level input_indexes.size
356
545
  end
546
+
547
+ # Return tuples with nils in place of repeating values, like this:
548
+ #
549
+ # [:a , :bar, :one]
550
+ # [nil, nil , :two]
551
+ # [nil, :foo, :one]
552
+ #
553
+ def sparse_tuples
554
+ tuples = to_a
555
+ [tuples.first] + each_cons(2).map { |prev, cur|
556
+ left = cur.zip(prev).drop_while { |c, p| c == p }
557
+ [nil] * (cur.size - left.size) + left.map(&:first)
558
+ }
559
+ end
560
+ end
561
+
562
+ class CategoricalIndex < Index
563
+ # Create a categorical index object.
564
+ # @param indexes [Array<object>] array of indexes
565
+ # @return [Daru::CategoricalIndex] categorical index
566
+ # @example
567
+ # Daru::CategoricalIndex.new [:a, 1, :a, 1, :c]
568
+ # # => #<Daru::CategoricalIndex(5): {a, 1, a, 1, c}>
569
+ def initialize indexes
570
+ # Create a hash to map each category to positional indexes
571
+ categories = indexes.each_with_index.group_by(&:first)
572
+ @cat_hash = categories.map { |cat, group| [cat, group.map(&:last)] }.to_h
573
+
574
+ # Map each category to a unique integer for effective storage in @array
575
+ map_cat_int = categories.keys.each_with_index.to_h
576
+
577
+ # To link every instance to its category,
578
+ # it stores integer for every instance representing its category
579
+ @array = map_cat_int.values_at(*indexes)
580
+ end
581
+
582
+ # Duplicates the index object and return it
583
+ # @return [Daru::CategoricalIndex] duplicated index object
584
+ def dup
585
+ # Improve it by intializing index by hash
586
+ Daru::CategoricalIndex.new to_a
587
+ end
588
+
589
+ # Returns true index or category is valid
590
+ # @param index [object] the index value to look for
591
+ # @return [true, false] true if index is included, false otherwise
592
+ def include? index
593
+ @cat_hash.include? index
594
+ end
595
+
596
+ # Returns array of categories
597
+ # @example
598
+ # x = Daru::CategoricalIndex.new [:a, 1, :a, 1, :c]
599
+ # x.categories
600
+ # # => [:a, 1, :c]
601
+ def categories
602
+ @cat_hash.keys
603
+ end
604
+
605
+ # Returns positions given categories or positions
606
+ # @note If the argument does not a valid category it treats it as position
607
+ # value and return it as it is.
608
+ # @param [Array<object>] *indexes categories or positions
609
+ # @example
610
+ # x = Daru::CategoricalIndex.new [:a, 1, :a, 1, :c]
611
+ # x.pos :a, 1
612
+ # # => [0, 1, 2, 3]
613
+ def pos *indexes
614
+ positions = indexes.map do |index|
615
+ if include? index
616
+ @cat_hash[index]
617
+ elsif index.is_a?(Numeric) && index < @array.size
618
+ index
619
+ else
620
+ raise IndexError, "#{index.inspect} is neither a valid category"\
621
+ ' nor a valid position'
622
+ end
623
+ end
624
+
625
+ positions.flatten!
626
+ positions.size == 1 ? positions.first : positions.sort
627
+ end
628
+
629
+ # Returns index value from position
630
+ # @param pos [Integer] the position to look for
631
+ # @return [object] category corresponding to position
632
+ # @example
633
+ # idx = Daru::CategoricalIndex.new [:a, :b, :a, :b, :c]
634
+ # idx.index_from_pos 1
635
+ # # => :b
636
+ def index_from_pos pos
637
+ cat_from_int @array[pos]
638
+ end
639
+
640
+ # Returns enumerator enumerating all index values in the order they occur
641
+ # @return [Enumerator] all index values
642
+ # @example
643
+ # idx = Daru::CategoricalIndex.new [:a, :a, :b]
644
+ # idx.each.to_a
645
+ # # => [:a, :a, :b]
646
+ def each
647
+ return enum_for(:each) unless block_given?
648
+ @array.each { |pos| yield cat_from_int pos }
649
+ self
650
+ end
651
+
652
+ # Compares two index object. Returns true if every instance of category
653
+ # occur at the same position
654
+ # @param [Daru::CateogricalIndex] other index object to be checked against
655
+ # @return [true, false] true if other is similar to self
656
+ # @example
657
+ # a = Daru::CategoricalIndex.new [:a, :a, :b]
658
+ # b = Daru::CategoricalIndex.new [:b, :a, :a]
659
+ # a == b
660
+ # # => false
661
+ def == other
662
+ self.class == other.class &&
663
+ size == other.size &&
664
+ to_h == other.to_h
665
+ end
666
+
667
+ # Returns all the index values
668
+ # @return [Array] all index values
669
+ # @example
670
+ # idx = Daru::CategoricalIndex.new [:a, :b, :a]
671
+ # idx.to_a
672
+ def to_a
673
+ each.to_a
674
+ end
675
+
676
+ # Returns hash table mapping category to positions at which they occur
677
+ # @return [Hash] hash table mapping category to array of positions
678
+ # @example
679
+ # idx = Daru::CategoricalIndex.new [:a, :b, :a]
680
+ # idx.to_h
681
+ # # => {:a=>[0, 2], :b=>[1]}
682
+ def to_h
683
+ @cat_hash
684
+ end
685
+
686
+ # Returns size of the index object
687
+ # @return [Integer] total number of instances of all categories
688
+ # @example
689
+ # idx = Daru::CategoricalIndex.new [:a, :b, :a]
690
+ # idx.size
691
+ # # => 3
692
+ def size
693
+ @array.size
694
+ end
695
+
696
+ # Returns true if index object is storing no category
697
+ # @return [true, false] true if index object is empty
698
+ # @example
699
+ # i = Daru::CategoricalIndex.new []
700
+ # # => #<Daru::CategoricalIndex(0): {}>
701
+ # i.empty?
702
+ # # => true
703
+ def empty?
704
+ @array.empty?
705
+ end
706
+
707
+ # Return subset given categories or positions
708
+ # @param [Array<object>] *indexes categories or positions
709
+ # @return [Daru::CategoricalIndex] subset of the self containing the
710
+ # mentioned categories or positions
711
+ # @example
712
+ # idx = Daru::CategoricalIndex.new [:a, :b, :a, :b, :c]
713
+ # idx.subset :a, :b
714
+ # # => #<Daru::CategoricalIndex(4): {a, b, a, b}>
715
+ def subset *indexes
716
+ positions = pos(*indexes)
717
+ new_index = positions.map { |pos| index_from_pos pos }
718
+
719
+ Daru::CategoricalIndex.new new_index.flatten
720
+ end
721
+
722
+ # Takes positional values and returns subset of the self
723
+ # capturing the categories at mentioned positions
724
+ # @param [Array<Integer>] positional values
725
+ # @return [object] index object
726
+ # @example
727
+ # idx = Daru::CategoricalIndex.new [:a, :b, :a, :b, :c]
728
+ # idx.at 0, 1
729
+ # # => #<Daru::CategoricalIndex(2): {a, b}>
730
+ def at *positions
731
+ positions = preprocess_positions(*positions)
732
+ validate_positions(*positions)
733
+ if positions.is_a? Integer
734
+ index_from_pos(positions)
735
+ else
736
+ Daru::CategoricalIndex.new positions.map(&method(:index_from_pos))
737
+ end
738
+ end
739
+
740
+ # Add specified index values to the index object
741
+ # @param [Array<object>] *indexes index values to add
742
+ # @return [Daru::CategoricalIndex] index object with added values
743
+ # @example
744
+ # idx = Daru::CategoricalIndex.new [:a, :b, :a, :b, :c]
745
+ # idx.add :d
746
+ # # => #<Daru::CategoricalIndex(6): {a, b, a, b, c, d}>
747
+ def add *indexes
748
+ Daru::CategoricalIndex.new(to_a + indexes)
749
+ end
750
+
751
+ private
752
+
753
+ def int_from_cat cat
754
+ @cat_hash.keys.index cat
755
+ end
756
+
757
+ def cat_from_int cat
758
+ @cat_hash.keys[cat]
759
+ end
357
760
  end
358
761
  end