daru 0.1.4.1 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +3 -0
  3. data/.travis.yml +3 -0
  4. data/CONTRIBUTING.md +27 -3
  5. data/Guardfile +7 -0
  6. data/History.md +39 -1
  7. data/README.md +1 -1
  8. data/daru.gemspec +9 -2
  9. data/lib/daru.rb +4 -1
  10. data/lib/daru/accessors/gsl_wrapper.rb +93 -91
  11. data/lib/daru/accessors/nmatrix_wrapper.rb +109 -107
  12. data/lib/daru/category.rb +22 -15
  13. data/lib/daru/core/group_by.rb +13 -2
  14. data/lib/daru/core/merge.rb +37 -31
  15. data/lib/daru/core/query.rb +10 -2
  16. data/lib/daru/dataframe.rb +95 -34
  17. data/lib/daru/date_time/index.rb +15 -16
  18. data/lib/daru/date_time/offsets.rb +14 -11
  19. data/lib/daru/formatters/table.rb +2 -2
  20. data/lib/daru/index/categorical_index.rb +201 -0
  21. data/lib/daru/index/index.rb +289 -0
  22. data/lib/daru/index/multi_index.rb +266 -0
  23. data/lib/daru/maths/statistics/vector.rb +13 -9
  24. data/lib/daru/monkeys.rb +0 -7
  25. data/lib/daru/plotting/gruff/category.rb +1 -0
  26. data/lib/daru/plotting/gruff/dataframe.rb +3 -3
  27. data/lib/daru/plotting/nyaplot/dataframe.rb +1 -1
  28. data/lib/daru/vector.rb +36 -21
  29. data/lib/daru/version.rb +1 -1
  30. data/spec/accessors/array_wrapper_spec.rb +3 -0
  31. data/spec/accessors/{wrappers_spec.rb → gsl_wrapper_spec.rb} +0 -35
  32. data/spec/accessors/nmatrix_wrapper_spec.rb +32 -0
  33. data/spec/{categorical_spec.rb → category_spec.rb} +3 -0
  34. data/spec/core/group_by_spec.rb +17 -1
  35. data/spec/core/merge_spec.rb +38 -1
  36. data/spec/core/query_spec.rb +5 -0
  37. data/spec/dataframe_spec.rb +230 -57
  38. data/spec/date_time/offsets_spec.rb +84 -3
  39. data/spec/formatters/table_formatter_spec.rb +9 -0
  40. data/spec/index/categorical_index_spec.rb +2 -0
  41. data/spec/index/index_spec.rb +17 -2
  42. data/spec/{math → maths}/arithmetic/dataframe_spec.rb +0 -0
  43. data/spec/{math → maths}/arithmetic/vector_spec.rb +0 -0
  44. data/spec/{math → maths}/statistics/dataframe_spec.rb +1 -1
  45. data/spec/{math → maths}/statistics/vector_spec.rb +7 -12
  46. data/spec/plotting/gruff/category_spec.rb +44 -0
  47. data/spec/plotting/gruff/dataframe_spec.rb +84 -0
  48. data/spec/plotting/gruff/vector_spec.rb +70 -0
  49. data/spec/plotting/nyaplot/category_spec.rb +51 -0
  50. data/spec/plotting/{dataframe_spec.rb → nyaplot/dataframe_spec.rb} +0 -83
  51. data/spec/plotting/nyaplot/vector_spec.rb +66 -0
  52. data/spec/spec_helper.rb +3 -2
  53. data/spec/vector_spec.rb +68 -1
  54. metadata +53 -24
  55. data/lib/daru/index.rb +0 -761
  56. data/spec/plotting/vector_spec.rb +0 -230
@@ -0,0 +1,266 @@
1
+ module Daru
2
+ class MultiIndex < Index
3
+ def each(&block)
4
+ to_a.each(&block)
5
+ end
6
+
7
+ def map(&block)
8
+ to_a.map(&block)
9
+ end
10
+
11
+ attr_reader :labels
12
+
13
+ def levels
14
+ @levels.map(&:keys)
15
+ end
16
+
17
+ def initialize opts={}
18
+ labels = opts[:labels]
19
+ levels = opts[:levels]
20
+
21
+ raise ArgumentError, 'Must specify both labels and levels' unless labels && levels
22
+ raise ArgumentError, 'Labels and levels should be same size' if labels.size != levels.size
23
+ raise ArgumentError, 'Incorrect labels and levels' if incorrect_fields?(labels, levels)
24
+
25
+ @labels = labels
26
+ @levels = levels.map { |e| e.map.with_index.to_h }
27
+ end
28
+
29
+ def incorrect_fields?(_labels, levels)
30
+ levels[0].size # FIXME: without this exact call some specs are failing
31
+
32
+ levels.any? { |e| e.uniq.size != e.size }
33
+ end
34
+
35
+ private :incorrect_fields?
36
+
37
+ def self.from_arrays arrays
38
+ levels = arrays.map { |e| e.uniq.sort_by(&:to_s) }
39
+
40
+ labels = arrays.each_with_index.map do |arry, level_index|
41
+ level = levels[level_index]
42
+ arry.map { |lvl| level.index(lvl) }
43
+ end
44
+
45
+ MultiIndex.new labels: labels, levels: levels
46
+ end
47
+
48
+ def self.from_tuples tuples
49
+ from_arrays tuples.transpose
50
+ end
51
+
52
+ def self.try_from_tuples tuples
53
+ if tuples.respond_to?(:first) && tuples.first.is_a?(Array)
54
+ from_tuples(tuples)
55
+ else
56
+ nil
57
+ end
58
+ end
59
+
60
+ def [] *key
61
+ key.flatten!
62
+ case
63
+ when key[0].is_a?(Range)
64
+ retrieve_from_range(key[0])
65
+ when key[0].is_a?(Integer) && key.size == 1
66
+ try_retrieve_from_integer(key[0])
67
+ else
68
+ begin
69
+ retrieve_from_tuples key
70
+ rescue NoMethodError
71
+ raise IndexError, "Specified index #{key.inspect} do not exist"
72
+ end
73
+ end
74
+ end
75
+
76
+ def valid? *indexes
77
+ # FIXME: This is perhaps not a good method
78
+ pos(*indexes)
79
+ return true
80
+ rescue IndexError
81
+ return false
82
+ end
83
+
84
+ # Returns positions given indexes or positions
85
+ # @note If the arugent is both a valid index and a valid position,
86
+ # it will treated as valid index
87
+ # @param [Array<object>] *indexes indexes or positions
88
+ # @example
89
+ # idx = Daru::MultiIndex.from_tuples [[:a, :one], [:a, :two], [:b, :one], [:b, :two]]
90
+ # idx.pos :a
91
+ # # => [0, 1]
92
+ def pos *indexes
93
+ if indexes.first.is_a? Integer
94
+ return indexes.first if indexes.size == 1
95
+ return indexes
96
+ end
97
+ res = self[indexes]
98
+ return res if res.is_a? Integer
99
+ res.map { |i| self[i] }
100
+ end
101
+
102
+ def subset *indexes
103
+ if indexes.first.is_a? Integer
104
+ MultiIndex.from_tuples(indexes.map { |index| key(index) })
105
+ else
106
+ self[indexes].conform indexes
107
+ end
108
+ end
109
+
110
+ # Takes positional values and returns subset of the self
111
+ # capturing the indexes at mentioned positions
112
+ # @param [Array<Integer>] positional values
113
+ # @return [object] index object
114
+ # @example
115
+ # idx = Daru::MultiIndex.from_tuples [[:a, :one], [:a, :two], [:b, :one], [:b, :two]]
116
+ # idx.at 0, 1
117
+ # # => #<Daru::MultiIndex(2x2)>
118
+ # # a one
119
+ # # two
120
+ def at *positions
121
+ positions = preprocess_positions(*positions)
122
+ validate_positions(*positions)
123
+ if positions.is_a? Integer
124
+ key(positions)
125
+ else
126
+ Daru::MultiIndex.from_tuples positions.map(&method(:key))
127
+ end
128
+ end
129
+
130
+ def add *indexes
131
+ Daru::MultiIndex.from_tuples to_a << indexes
132
+ end
133
+
134
+ def reorder(new_order)
135
+ from = to_a
136
+ self.class.from_tuples(new_order.map { |i| from[i] })
137
+ end
138
+
139
+ def try_retrieve_from_integer int
140
+ @levels[0].key?(int) ? retrieve_from_tuples([int]) : int
141
+ end
142
+
143
+ def retrieve_from_range range
144
+ MultiIndex.from_tuples(range.map { |index| key(index) })
145
+ end
146
+
147
+ def retrieve_from_tuples key
148
+ chosen = []
149
+
150
+ key.each_with_index do |k, depth|
151
+ level_index = @levels[depth][k]
152
+ raise IndexError, "Specified index #{key.inspect} do not exist" if level_index.nil?
153
+ label = @labels[depth]
154
+ chosen = find_all_indexes label, level_index, chosen
155
+ end
156
+
157
+ return chosen[0] if chosen.size == 1 && key.size == @levels.size
158
+ multi_index_from_multiple_selections(chosen)
159
+ end
160
+
161
+ def multi_index_from_multiple_selections chosen
162
+ MultiIndex.from_tuples(chosen.map { |e| key(e) })
163
+ end
164
+
165
+ def find_all_indexes label, level_index, chosen
166
+ if chosen.empty?
167
+ label.each_with_index
168
+ .select { |lbl, _| lbl == level_index }.map(&:last)
169
+ else
170
+ chosen.keep_if { |c| label[c] == level_index }
171
+ end
172
+ end
173
+
174
+ private :find_all_indexes, :multi_index_from_multiple_selections,
175
+ :retrieve_from_range, :retrieve_from_tuples
176
+
177
+ def key index
178
+ raise ArgumentError, "Key #{index} is too large" if index >= @labels[0].size
179
+
180
+ @labels
181
+ .each_with_index
182
+ .map { |label, i| @levels[i].keys[label[index]] }
183
+ end
184
+
185
+ def dup
186
+ MultiIndex.new levels: levels.dup, labels: labels
187
+ end
188
+
189
+ def drop_left_level by=1
190
+ MultiIndex.from_arrays to_a.transpose[by..-1]
191
+ end
192
+
193
+ def | other
194
+ MultiIndex.from_tuples(to_a | other.to_a)
195
+ end
196
+
197
+ def & other
198
+ MultiIndex.from_tuples(to_a & other.to_a)
199
+ end
200
+
201
+ def empty?
202
+ @labels.flatten.empty? && @levels.all?(&:empty?)
203
+ end
204
+
205
+ def include? tuple
206
+ return false unless tuple.is_a? Enumerable
207
+ tuple.flatten.each_with_index
208
+ .all? { |tup, i| @levels[i][tup] }
209
+ end
210
+
211
+ def size
212
+ @labels[0].size
213
+ end
214
+
215
+ def width
216
+ @levels.size
217
+ end
218
+
219
+ def == other
220
+ self.class == other.class &&
221
+ labels == other.labels &&
222
+ levels == other.levels
223
+ end
224
+
225
+ def to_a
226
+ (0...size).map { |e| key(e) }
227
+ end
228
+
229
+ def values
230
+ Array.new(size) { |i| i }
231
+ end
232
+
233
+ def inspect threshold=20
234
+ "#<Daru::MultiIndex(#{size}x#{width})>\n" +
235
+ Formatters::Table.format([], row_headers: sparse_tuples, threshold: threshold)
236
+ end
237
+
238
+ def to_html
239
+ path = File.expand_path('../../iruby/templates/multi_index.html.erb', __FILE__)
240
+ ERB.new(File.read(path).strip).result(binding)
241
+ end
242
+
243
+ # Provide a MultiIndex for sub vector produced
244
+ #
245
+ # @param input_indexes [Array] the input by user to index the vector
246
+ # @return [Object] the MultiIndex object for sub vector produced
247
+ def conform input_indexes
248
+ return self if input_indexes[0].is_a? Range
249
+ drop_left_level input_indexes.size
250
+ end
251
+
252
+ # Return tuples with nils in place of repeating values, like this:
253
+ #
254
+ # [:a , :bar, :one]
255
+ # [nil, nil , :two]
256
+ # [nil, :foo, :one]
257
+ #
258
+ def sparse_tuples
259
+ tuples = to_a
260
+ [tuples.first] + each_cons(2).map { |prev, cur|
261
+ left = cur.zip(prev).drop_while { |c, p| c == p }
262
+ [nil] * (cur.size - left.size) + left.map(&:first)
263
+ }
264
+ end
265
+ end
266
+ end
@@ -5,6 +5,8 @@ module Daru
5
5
  # the computationally intensive tasks.
6
6
  module Statistics
7
7
  module Vector # rubocop:disable Metrics/ModuleLength
8
+ extend Gem::Deprecate
9
+
8
10
  def mean
9
11
  @data.mean
10
12
  end
@@ -30,7 +32,8 @@ module Daru
30
32
  end
31
33
 
32
34
  def mode
33
- frequencies.max { |a,b| a[1]<=>b[1] }.first
35
+ mode = frequencies.to_h.select { |_,v| v == frequencies.max }.keys
36
+ mode.size > 1 ? Daru::Vector.new(mode) : mode.first
34
37
  end
35
38
 
36
39
  # Create a summary of count, mean, standard deviation, min and max of
@@ -88,25 +91,26 @@ module Daru
88
91
  end
89
92
 
90
93
  def frequencies
91
- @data.each_with_object(Hash.new(0)) do |element, hash|
92
- hash[element] += 1 unless element.nil?
93
- end
94
+ Daru::Vector.new(
95
+ @data.each_with_object(Hash.new(0)) do |element, hash|
96
+ hash[element] += 1 unless element.nil?
97
+ end
98
+ )
94
99
  end
95
100
 
96
- def freqs
97
- Daru::Vector.new(frequencies)
98
- end
101
+ alias_method :freqs, :frequencies
102
+ deprecate :freqs, :frequencies, 2016, 10
99
103
 
100
104
  def proportions
101
105
  len = size - count_values(*Daru::MISSING_VALUES)
102
- frequencies.each_with_object({}) do |(el, count), hash|
106
+ frequencies.to_h.each_with_object({}) do |(el, count), hash|
103
107
  hash[el] = count / len
104
108
  end
105
109
  end
106
110
 
107
111
  def ranked
108
112
  sum = 0
109
- r = frequencies.sort.each_with_object({}) do |(el, count), memo|
113
+ r = frequencies.to_h.sort.each_with_object({}) do |(el, count), memo|
110
114
  memo[el] = ((sum + 1) + (sum + count)).quo(2)
111
115
  sum += count
112
116
  end
@@ -62,11 +62,4 @@ class Object
62
62
  end
63
63
  end
64
64
  end
65
-
66
- module Daru
67
- class DataFrame
68
- # NOTE: This alias will soon be removed. Use to_h in all future work.
69
- alias :to_hash :to_h
70
- end
71
- end
72
65
  # :nocov:
@@ -38,6 +38,7 @@ module Daru
38
38
  def category_sidebar_plot size, method
39
39
  plot = Gruff::SideBar.new size
40
40
  plot.labels = {0 => (name.to_s || 'vector')}
41
+ method ||= :count
41
42
  frequencies(method).each_with_index do |data, index|
42
43
  plot.data index, data
43
44
  end
@@ -7,9 +7,9 @@ module Daru
7
7
  size = opts[:size] || 500
8
8
  x = extract_x_vector opts[:x]
9
9
  y = extract_y_vectors opts[:y]
10
- return plot_with_category(
11
- size, type, x, y, opts[:categorized]
12
- ) if opts[:categorized]
10
+ if opts[:categorized]
11
+ return plot_with_category(size, type, x, y, opts[:categorized])
12
+ end
13
13
  case type
14
14
  when :line, :bar, :scatter
15
15
  plot = send("#{type}_plot", size, x, y)
@@ -31,7 +31,7 @@ module Daru
31
31
  private
32
32
 
33
33
  def plot_without_category opts
34
- options = {type: :scatter}.merge(opts)
34
+ options = {type: :scatter}.merge(opts)
35
35
 
36
36
  plot = Nyaplot::Plot.new
37
37
  types = extract_option :type, options
@@ -190,9 +190,11 @@ module Daru
190
190
  case lib
191
191
  when :gruff, :nyaplot
192
192
  @plotting_library = lib
193
- extend Module.const_get(
194
- "Daru::Plotting::Vector::#{lib.to_s.capitalize}Library"
195
- ) if Daru.send("has_#{lib}?".to_sym)
193
+ if Daru.send("has_#{lib}?".to_sym)
194
+ extend Module.const_get(
195
+ "Daru::Plotting::Vector::#{lib.to_s.capitalize}Library"
196
+ )
197
+ end
196
198
  else
197
199
  raise ArguementError, "Plotting library #{lib} not supported. "\
198
200
  'Supported libraries are :nyaplot and :gruff'
@@ -291,7 +293,7 @@ module Daru
291
293
  update_position_cache
292
294
  end
293
295
 
294
- # Two vectors are equal if the have the exact same index values corresponding
296
+ # Two vectors are equal if they have the exact same index values corresponding
295
297
  # with the exact same elements. Name is ignored.
296
298
  def == other
297
299
  case other
@@ -353,6 +355,7 @@ module Daru
353
355
  mod.apply_scalar_operator operator, @data,other
354
356
  end
355
357
  end
358
+ alias_method operator, method if operator != :== && operator != :!=
356
359
  end
357
360
  alias :gt :mt
358
361
  alias :gteq :mteq
@@ -593,15 +596,6 @@ module Daru
593
596
  end
594
597
  }
595
598
 
596
- def resort_index vector_index, opts
597
- if block_given?
598
- vector_index.sort { |(lv, _li), (rv, _ri)| yield(lv, rv) }
599
- else
600
- vector_index.sort(&DEFAULT_SORTER)
601
- end
602
- .tap { |res| res.reverse! unless opts[:ascending] }
603
- end
604
-
605
599
  # Just sort the data and get an Array in return using Enumerable#sort.
606
600
  # Non-destructive.
607
601
  # :nocov:
@@ -834,8 +828,10 @@ module Daru
834
828
  # # [
835
829
  # # [1, 2, 3] ]
836
830
  def to_nmatrix axis=:horizontal
837
- raise ArgumentError, 'Can not convert to nmatrix'\
838
- 'because the vector is numeric' unless numeric? && !include?(nil)
831
+ unless numeric? && !include?(nil)
832
+ raise ArgumentError, 'Can not convert to nmatrix'\
833
+ 'because the vector is numeric'
834
+ end
839
835
 
840
836
  case axis
841
837
  when :horizontal
@@ -973,7 +969,8 @@ module Daru
973
969
  # # c 3
974
970
  def reorder! order
975
971
  @index = @index.reorder order
976
- @data = order.map { |i| @data[i] }
972
+ data_array = order.map { |i| @data[i] }
973
+ @data = cast_vector_to @dtype, data_array, @nm_dtype
977
974
  update_position_cache
978
975
  self
979
976
  end
@@ -990,11 +987,16 @@ module Daru
990
987
  end
991
988
 
992
989
  def index= idx
993
- raise ArgumentError,
994
- "Size of supplied index #{index.size} does not match size of DataFrame" if
995
- idx.size != size
996
- raise ArgumentError, 'Can only assign type Index and its subclasses.' unless
997
- idx.is_a?(Daru::Index)
990
+ idx = Index.coerce idx
991
+
992
+ if idx.size != size
993
+ raise ArgumentError,
994
+ "Size of supplied index #{idx.size} does not match size of Vector"
995
+ end
996
+
997
+ unless idx.is_a?(Daru::Index)
998
+ raise ArgumentError, 'Can only assign type Index and its subclasses.'
999
+ end
998
1000
 
999
1001
  @index = idx
1000
1002
  self
@@ -1328,6 +1330,10 @@ module Daru
1328
1330
  end
1329
1331
  end
1330
1332
 
1333
+ def group_by(*args)
1334
+ to_df.group_by(*args)
1335
+ end
1336
+
1331
1337
  private
1332
1338
 
1333
1339
  def nil_positions
@@ -1538,5 +1544,14 @@ module Daru
1538
1544
  @nil_positions = nil
1539
1545
  @nan_positions = nil
1540
1546
  end
1547
+
1548
+ def resort_index vector_index, opts
1549
+ if block_given?
1550
+ vector_index.sort { |(lv, _li), (rv, _ri)| yield(lv, rv) }
1551
+ else
1552
+ vector_index.sort(&DEFAULT_SORTER)
1553
+ end
1554
+ .tap { |res| res.reverse! unless opts[:ascending] }
1555
+ end
1541
1556
  end
1542
1557
  end