daru 0.1.4.1 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +3 -0
  3. data/.travis.yml +3 -0
  4. data/CONTRIBUTING.md +27 -3
  5. data/Guardfile +7 -0
  6. data/History.md +39 -1
  7. data/README.md +1 -1
  8. data/daru.gemspec +9 -2
  9. data/lib/daru.rb +4 -1
  10. data/lib/daru/accessors/gsl_wrapper.rb +93 -91
  11. data/lib/daru/accessors/nmatrix_wrapper.rb +109 -107
  12. data/lib/daru/category.rb +22 -15
  13. data/lib/daru/core/group_by.rb +13 -2
  14. data/lib/daru/core/merge.rb +37 -31
  15. data/lib/daru/core/query.rb +10 -2
  16. data/lib/daru/dataframe.rb +95 -34
  17. data/lib/daru/date_time/index.rb +15 -16
  18. data/lib/daru/date_time/offsets.rb +14 -11
  19. data/lib/daru/formatters/table.rb +2 -2
  20. data/lib/daru/index/categorical_index.rb +201 -0
  21. data/lib/daru/index/index.rb +289 -0
  22. data/lib/daru/index/multi_index.rb +266 -0
  23. data/lib/daru/maths/statistics/vector.rb +13 -9
  24. data/lib/daru/monkeys.rb +0 -7
  25. data/lib/daru/plotting/gruff/category.rb +1 -0
  26. data/lib/daru/plotting/gruff/dataframe.rb +3 -3
  27. data/lib/daru/plotting/nyaplot/dataframe.rb +1 -1
  28. data/lib/daru/vector.rb +36 -21
  29. data/lib/daru/version.rb +1 -1
  30. data/spec/accessors/array_wrapper_spec.rb +3 -0
  31. data/spec/accessors/{wrappers_spec.rb → gsl_wrapper_spec.rb} +0 -35
  32. data/spec/accessors/nmatrix_wrapper_spec.rb +32 -0
  33. data/spec/{categorical_spec.rb → category_spec.rb} +3 -0
  34. data/spec/core/group_by_spec.rb +17 -1
  35. data/spec/core/merge_spec.rb +38 -1
  36. data/spec/core/query_spec.rb +5 -0
  37. data/spec/dataframe_spec.rb +230 -57
  38. data/spec/date_time/offsets_spec.rb +84 -3
  39. data/spec/formatters/table_formatter_spec.rb +9 -0
  40. data/spec/index/categorical_index_spec.rb +2 -0
  41. data/spec/index/index_spec.rb +17 -2
  42. data/spec/{math → maths}/arithmetic/dataframe_spec.rb +0 -0
  43. data/spec/{math → maths}/arithmetic/vector_spec.rb +0 -0
  44. data/spec/{math → maths}/statistics/dataframe_spec.rb +1 -1
  45. data/spec/{math → maths}/statistics/vector_spec.rb +7 -12
  46. data/spec/plotting/gruff/category_spec.rb +44 -0
  47. data/spec/plotting/gruff/dataframe_spec.rb +84 -0
  48. data/spec/plotting/gruff/vector_spec.rb +70 -0
  49. data/spec/plotting/nyaplot/category_spec.rb +51 -0
  50. data/spec/plotting/{dataframe_spec.rb → nyaplot/dataframe_spec.rb} +0 -83
  51. data/spec/plotting/nyaplot/vector_spec.rb +66 -0
  52. data/spec/spec_helper.rb +3 -2
  53. data/spec/vector_spec.rb +68 -1
  54. metadata +53 -24
  55. data/lib/daru/index.rb +0 -761
  56. data/spec/plotting/vector_spec.rb +0 -230
@@ -0,0 +1,266 @@
1
+ module Daru
2
+ class MultiIndex < Index
3
+ def each(&block)
4
+ to_a.each(&block)
5
+ end
6
+
7
+ def map(&block)
8
+ to_a.map(&block)
9
+ end
10
+
11
+ attr_reader :labels
12
+
13
+ def levels
14
+ @levels.map(&:keys)
15
+ end
16
+
17
+ def initialize opts={}
18
+ labels = opts[:labels]
19
+ levels = opts[:levels]
20
+
21
+ raise ArgumentError, 'Must specify both labels and levels' unless labels && levels
22
+ raise ArgumentError, 'Labels and levels should be same size' if labels.size != levels.size
23
+ raise ArgumentError, 'Incorrect labels and levels' if incorrect_fields?(labels, levels)
24
+
25
+ @labels = labels
26
+ @levels = levels.map { |e| e.map.with_index.to_h }
27
+ end
28
+
29
+ def incorrect_fields?(_labels, levels)
30
+ levels[0].size # FIXME: without this exact call some specs are failing
31
+
32
+ levels.any? { |e| e.uniq.size != e.size }
33
+ end
34
+
35
+ private :incorrect_fields?
36
+
37
+ def self.from_arrays arrays
38
+ levels = arrays.map { |e| e.uniq.sort_by(&:to_s) }
39
+
40
+ labels = arrays.each_with_index.map do |arry, level_index|
41
+ level = levels[level_index]
42
+ arry.map { |lvl| level.index(lvl) }
43
+ end
44
+
45
+ MultiIndex.new labels: labels, levels: levels
46
+ end
47
+
48
+ def self.from_tuples tuples
49
+ from_arrays tuples.transpose
50
+ end
51
+
52
+ def self.try_from_tuples tuples
53
+ if tuples.respond_to?(:first) && tuples.first.is_a?(Array)
54
+ from_tuples(tuples)
55
+ else
56
+ nil
57
+ end
58
+ end
59
+
60
+ def [] *key
61
+ key.flatten!
62
+ case
63
+ when key[0].is_a?(Range)
64
+ retrieve_from_range(key[0])
65
+ when key[0].is_a?(Integer) && key.size == 1
66
+ try_retrieve_from_integer(key[0])
67
+ else
68
+ begin
69
+ retrieve_from_tuples key
70
+ rescue NoMethodError
71
+ raise IndexError, "Specified index #{key.inspect} do not exist"
72
+ end
73
+ end
74
+ end
75
+
76
+ def valid? *indexes
77
+ # FIXME: This is perhaps not a good method
78
+ pos(*indexes)
79
+ return true
80
+ rescue IndexError
81
+ return false
82
+ end
83
+
84
+ # Returns positions given indexes or positions
85
+ # @note If the arugent is both a valid index and a valid position,
86
+ # it will treated as valid index
87
+ # @param [Array<object>] *indexes indexes or positions
88
+ # @example
89
+ # idx = Daru::MultiIndex.from_tuples [[:a, :one], [:a, :two], [:b, :one], [:b, :two]]
90
+ # idx.pos :a
91
+ # # => [0, 1]
92
+ def pos *indexes
93
+ if indexes.first.is_a? Integer
94
+ return indexes.first if indexes.size == 1
95
+ return indexes
96
+ end
97
+ res = self[indexes]
98
+ return res if res.is_a? Integer
99
+ res.map { |i| self[i] }
100
+ end
101
+
102
+ def subset *indexes
103
+ if indexes.first.is_a? Integer
104
+ MultiIndex.from_tuples(indexes.map { |index| key(index) })
105
+ else
106
+ self[indexes].conform indexes
107
+ end
108
+ end
109
+
110
+ # Takes positional values and returns subset of the self
111
+ # capturing the indexes at mentioned positions
112
+ # @param [Array<Integer>] positional values
113
+ # @return [object] index object
114
+ # @example
115
+ # idx = Daru::MultiIndex.from_tuples [[:a, :one], [:a, :two], [:b, :one], [:b, :two]]
116
+ # idx.at 0, 1
117
+ # # => #<Daru::MultiIndex(2x2)>
118
+ # # a one
119
+ # # two
120
+ def at *positions
121
+ positions = preprocess_positions(*positions)
122
+ validate_positions(*positions)
123
+ if positions.is_a? Integer
124
+ key(positions)
125
+ else
126
+ Daru::MultiIndex.from_tuples positions.map(&method(:key))
127
+ end
128
+ end
129
+
130
+ def add *indexes
131
+ Daru::MultiIndex.from_tuples to_a << indexes
132
+ end
133
+
134
+ def reorder(new_order)
135
+ from = to_a
136
+ self.class.from_tuples(new_order.map { |i| from[i] })
137
+ end
138
+
139
+ def try_retrieve_from_integer int
140
+ @levels[0].key?(int) ? retrieve_from_tuples([int]) : int
141
+ end
142
+
143
+ def retrieve_from_range range
144
+ MultiIndex.from_tuples(range.map { |index| key(index) })
145
+ end
146
+
147
+ def retrieve_from_tuples key
148
+ chosen = []
149
+
150
+ key.each_with_index do |k, depth|
151
+ level_index = @levels[depth][k]
152
+ raise IndexError, "Specified index #{key.inspect} do not exist" if level_index.nil?
153
+ label = @labels[depth]
154
+ chosen = find_all_indexes label, level_index, chosen
155
+ end
156
+
157
+ return chosen[0] if chosen.size == 1 && key.size == @levels.size
158
+ multi_index_from_multiple_selections(chosen)
159
+ end
160
+
161
+ def multi_index_from_multiple_selections chosen
162
+ MultiIndex.from_tuples(chosen.map { |e| key(e) })
163
+ end
164
+
165
+ def find_all_indexes label, level_index, chosen
166
+ if chosen.empty?
167
+ label.each_with_index
168
+ .select { |lbl, _| lbl == level_index }.map(&:last)
169
+ else
170
+ chosen.keep_if { |c| label[c] == level_index }
171
+ end
172
+ end
173
+
174
+ private :find_all_indexes, :multi_index_from_multiple_selections,
175
+ :retrieve_from_range, :retrieve_from_tuples
176
+
177
+ def key index
178
+ raise ArgumentError, "Key #{index} is too large" if index >= @labels[0].size
179
+
180
+ @labels
181
+ .each_with_index
182
+ .map { |label, i| @levels[i].keys[label[index]] }
183
+ end
184
+
185
+ def dup
186
+ MultiIndex.new levels: levels.dup, labels: labels
187
+ end
188
+
189
+ def drop_left_level by=1
190
+ MultiIndex.from_arrays to_a.transpose[by..-1]
191
+ end
192
+
193
+ def | other
194
+ MultiIndex.from_tuples(to_a | other.to_a)
195
+ end
196
+
197
+ def & other
198
+ MultiIndex.from_tuples(to_a & other.to_a)
199
+ end
200
+
201
+ def empty?
202
+ @labels.flatten.empty? && @levels.all?(&:empty?)
203
+ end
204
+
205
+ def include? tuple
206
+ return false unless tuple.is_a? Enumerable
207
+ tuple.flatten.each_with_index
208
+ .all? { |tup, i| @levels[i][tup] }
209
+ end
210
+
211
+ def size
212
+ @labels[0].size
213
+ end
214
+
215
+ def width
216
+ @levels.size
217
+ end
218
+
219
+ def == other
220
+ self.class == other.class &&
221
+ labels == other.labels &&
222
+ levels == other.levels
223
+ end
224
+
225
+ def to_a
226
+ (0...size).map { |e| key(e) }
227
+ end
228
+
229
+ def values
230
+ Array.new(size) { |i| i }
231
+ end
232
+
233
+ def inspect threshold=20
234
+ "#<Daru::MultiIndex(#{size}x#{width})>\n" +
235
+ Formatters::Table.format([], row_headers: sparse_tuples, threshold: threshold)
236
+ end
237
+
238
+ def to_html
239
+ path = File.expand_path('../../iruby/templates/multi_index.html.erb', __FILE__)
240
+ ERB.new(File.read(path).strip).result(binding)
241
+ end
242
+
243
+ # Provide a MultiIndex for sub vector produced
244
+ #
245
+ # @param input_indexes [Array] the input by user to index the vector
246
+ # @return [Object] the MultiIndex object for sub vector produced
247
+ def conform input_indexes
248
+ return self if input_indexes[0].is_a? Range
249
+ drop_left_level input_indexes.size
250
+ end
251
+
252
+ # Return tuples with nils in place of repeating values, like this:
253
+ #
254
+ # [:a , :bar, :one]
255
+ # [nil, nil , :two]
256
+ # [nil, :foo, :one]
257
+ #
258
+ def sparse_tuples
259
+ tuples = to_a
260
+ [tuples.first] + each_cons(2).map { |prev, cur|
261
+ left = cur.zip(prev).drop_while { |c, p| c == p }
262
+ [nil] * (cur.size - left.size) + left.map(&:first)
263
+ }
264
+ end
265
+ end
266
+ end
@@ -5,6 +5,8 @@ module Daru
5
5
  # the computationally intensive tasks.
6
6
  module Statistics
7
7
  module Vector # rubocop:disable Metrics/ModuleLength
8
+ extend Gem::Deprecate
9
+
8
10
  def mean
9
11
  @data.mean
10
12
  end
@@ -30,7 +32,8 @@ module Daru
30
32
  end
31
33
 
32
34
  def mode
33
- frequencies.max { |a,b| a[1]<=>b[1] }.first
35
+ mode = frequencies.to_h.select { |_,v| v == frequencies.max }.keys
36
+ mode.size > 1 ? Daru::Vector.new(mode) : mode.first
34
37
  end
35
38
 
36
39
  # Create a summary of count, mean, standard deviation, min and max of
@@ -88,25 +91,26 @@ module Daru
88
91
  end
89
92
 
90
93
  def frequencies
91
- @data.each_with_object(Hash.new(0)) do |element, hash|
92
- hash[element] += 1 unless element.nil?
93
- end
94
+ Daru::Vector.new(
95
+ @data.each_with_object(Hash.new(0)) do |element, hash|
96
+ hash[element] += 1 unless element.nil?
97
+ end
98
+ )
94
99
  end
95
100
 
96
- def freqs
97
- Daru::Vector.new(frequencies)
98
- end
101
+ alias_method :freqs, :frequencies
102
+ deprecate :freqs, :frequencies, 2016, 10
99
103
 
100
104
  def proportions
101
105
  len = size - count_values(*Daru::MISSING_VALUES)
102
- frequencies.each_with_object({}) do |(el, count), hash|
106
+ frequencies.to_h.each_with_object({}) do |(el, count), hash|
103
107
  hash[el] = count / len
104
108
  end
105
109
  end
106
110
 
107
111
  def ranked
108
112
  sum = 0
109
- r = frequencies.sort.each_with_object({}) do |(el, count), memo|
113
+ r = frequencies.to_h.sort.each_with_object({}) do |(el, count), memo|
110
114
  memo[el] = ((sum + 1) + (sum + count)).quo(2)
111
115
  sum += count
112
116
  end
@@ -62,11 +62,4 @@ class Object
62
62
  end
63
63
  end
64
64
  end
65
-
66
- module Daru
67
- class DataFrame
68
- # NOTE: This alias will soon be removed. Use to_h in all future work.
69
- alias :to_hash :to_h
70
- end
71
- end
72
65
  # :nocov:
@@ -38,6 +38,7 @@ module Daru
38
38
  def category_sidebar_plot size, method
39
39
  plot = Gruff::SideBar.new size
40
40
  plot.labels = {0 => (name.to_s || 'vector')}
41
+ method ||= :count
41
42
  frequencies(method).each_with_index do |data, index|
42
43
  plot.data index, data
43
44
  end
@@ -7,9 +7,9 @@ module Daru
7
7
  size = opts[:size] || 500
8
8
  x = extract_x_vector opts[:x]
9
9
  y = extract_y_vectors opts[:y]
10
- return plot_with_category(
11
- size, type, x, y, opts[:categorized]
12
- ) if opts[:categorized]
10
+ if opts[:categorized]
11
+ return plot_with_category(size, type, x, y, opts[:categorized])
12
+ end
13
13
  case type
14
14
  when :line, :bar, :scatter
15
15
  plot = send("#{type}_plot", size, x, y)
@@ -31,7 +31,7 @@ module Daru
31
31
  private
32
32
 
33
33
  def plot_without_category opts
34
- options = {type: :scatter}.merge(opts)
34
+ options = {type: :scatter}.merge(opts)
35
35
 
36
36
  plot = Nyaplot::Plot.new
37
37
  types = extract_option :type, options
@@ -190,9 +190,11 @@ module Daru
190
190
  case lib
191
191
  when :gruff, :nyaplot
192
192
  @plotting_library = lib
193
- extend Module.const_get(
194
- "Daru::Plotting::Vector::#{lib.to_s.capitalize}Library"
195
- ) if Daru.send("has_#{lib}?".to_sym)
193
+ if Daru.send("has_#{lib}?".to_sym)
194
+ extend Module.const_get(
195
+ "Daru::Plotting::Vector::#{lib.to_s.capitalize}Library"
196
+ )
197
+ end
196
198
  else
197
199
  raise ArguementError, "Plotting library #{lib} not supported. "\
198
200
  'Supported libraries are :nyaplot and :gruff'
@@ -291,7 +293,7 @@ module Daru
291
293
  update_position_cache
292
294
  end
293
295
 
294
- # Two vectors are equal if the have the exact same index values corresponding
296
+ # Two vectors are equal if they have the exact same index values corresponding
295
297
  # with the exact same elements. Name is ignored.
296
298
  def == other
297
299
  case other
@@ -353,6 +355,7 @@ module Daru
353
355
  mod.apply_scalar_operator operator, @data,other
354
356
  end
355
357
  end
358
+ alias_method operator, method if operator != :== && operator != :!=
356
359
  end
357
360
  alias :gt :mt
358
361
  alias :gteq :mteq
@@ -593,15 +596,6 @@ module Daru
593
596
  end
594
597
  }
595
598
 
596
- def resort_index vector_index, opts
597
- if block_given?
598
- vector_index.sort { |(lv, _li), (rv, _ri)| yield(lv, rv) }
599
- else
600
- vector_index.sort(&DEFAULT_SORTER)
601
- end
602
- .tap { |res| res.reverse! unless opts[:ascending] }
603
- end
604
-
605
599
  # Just sort the data and get an Array in return using Enumerable#sort.
606
600
  # Non-destructive.
607
601
  # :nocov:
@@ -834,8 +828,10 @@ module Daru
834
828
  # # [
835
829
  # # [1, 2, 3] ]
836
830
  def to_nmatrix axis=:horizontal
837
- raise ArgumentError, 'Can not convert to nmatrix'\
838
- 'because the vector is numeric' unless numeric? && !include?(nil)
831
+ unless numeric? && !include?(nil)
832
+ raise ArgumentError, 'Can not convert to nmatrix'\
833
+ 'because the vector is numeric'
834
+ end
839
835
 
840
836
  case axis
841
837
  when :horizontal
@@ -973,7 +969,8 @@ module Daru
973
969
  # # c 3
974
970
  def reorder! order
975
971
  @index = @index.reorder order
976
- @data = order.map { |i| @data[i] }
972
+ data_array = order.map { |i| @data[i] }
973
+ @data = cast_vector_to @dtype, data_array, @nm_dtype
977
974
  update_position_cache
978
975
  self
979
976
  end
@@ -990,11 +987,16 @@ module Daru
990
987
  end
991
988
 
992
989
  def index= idx
993
- raise ArgumentError,
994
- "Size of supplied index #{index.size} does not match size of DataFrame" if
995
- idx.size != size
996
- raise ArgumentError, 'Can only assign type Index and its subclasses.' unless
997
- idx.is_a?(Daru::Index)
990
+ idx = Index.coerce idx
991
+
992
+ if idx.size != size
993
+ raise ArgumentError,
994
+ "Size of supplied index #{idx.size} does not match size of Vector"
995
+ end
996
+
997
+ unless idx.is_a?(Daru::Index)
998
+ raise ArgumentError, 'Can only assign type Index and its subclasses.'
999
+ end
998
1000
 
999
1001
  @index = idx
1000
1002
  self
@@ -1328,6 +1330,10 @@ module Daru
1328
1330
  end
1329
1331
  end
1330
1332
 
1333
+ def group_by(*args)
1334
+ to_df.group_by(*args)
1335
+ end
1336
+
1331
1337
  private
1332
1338
 
1333
1339
  def nil_positions
@@ -1538,5 +1544,14 @@ module Daru
1538
1544
  @nil_positions = nil
1539
1545
  @nan_positions = nil
1540
1546
  end
1547
+
1548
+ def resort_index vector_index, opts
1549
+ if block_given?
1550
+ vector_index.sort { |(lv, _li), (rv, _ri)| yield(lv, rv) }
1551
+ else
1552
+ vector_index.sort(&DEFAULT_SORTER)
1553
+ end
1554
+ .tap { |res| res.reverse! unless opts[:ascending] }
1555
+ end
1541
1556
  end
1542
1557
  end