daru 0.1.3.1 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rspec +2 -1
  4. data/.rspec_formatter.rb +33 -0
  5. data/.rubocop.yml +26 -2
  6. data/History.md +38 -0
  7. data/README.md +22 -13
  8. data/Rakefile +50 -2
  9. data/benchmarks/csv_reading.rb +22 -0
  10. data/daru.gemspec +9 -2
  11. data/lib/daru.rb +36 -4
  12. data/lib/daru/accessors/array_wrapper.rb +6 -1
  13. data/lib/daru/accessors/dataframe_by_row.rb +10 -2
  14. data/lib/daru/accessors/gsl_wrapper.rb +1 -3
  15. data/lib/daru/accessors/nmatrix_wrapper.rb +9 -0
  16. data/lib/daru/category.rb +935 -0
  17. data/lib/daru/core/group_by.rb +29 -38
  18. data/lib/daru/core/merge.rb +186 -145
  19. data/lib/daru/core/query.rb +22 -11
  20. data/lib/daru/dataframe.rb +976 -885
  21. data/lib/daru/date_time/index.rb +166 -166
  22. data/lib/daru/date_time/offsets.rb +66 -77
  23. data/lib/daru/formatters/table.rb +54 -0
  24. data/lib/daru/helpers/array.rb +40 -0
  25. data/lib/daru/index.rb +476 -73
  26. data/lib/daru/io/io.rb +66 -45
  27. data/lib/daru/io/sql_data_source.rb +33 -62
  28. data/lib/daru/iruby/helpers.rb +38 -0
  29. data/lib/daru/iruby/templates/dataframe.html.erb +52 -0
  30. data/lib/daru/iruby/templates/dataframe_mi.html.erb +58 -0
  31. data/lib/daru/iruby/templates/multi_index.html.erb +12 -0
  32. data/lib/daru/iruby/templates/vector.html.erb +27 -0
  33. data/lib/daru/iruby/templates/vector_mi.html.erb +36 -0
  34. data/lib/daru/maths/arithmetic/dataframe.rb +16 -18
  35. data/lib/daru/maths/arithmetic/vector.rb +4 -6
  36. data/lib/daru/maths/statistics/dataframe.rb +8 -15
  37. data/lib/daru/maths/statistics/vector.rb +120 -98
  38. data/lib/daru/monkeys.rb +12 -40
  39. data/lib/daru/plotting/gruff.rb +3 -0
  40. data/lib/daru/plotting/gruff/category.rb +49 -0
  41. data/lib/daru/plotting/gruff/dataframe.rb +91 -0
  42. data/lib/daru/plotting/gruff/vector.rb +57 -0
  43. data/lib/daru/plotting/nyaplot.rb +3 -0
  44. data/lib/daru/plotting/nyaplot/category.rb +34 -0
  45. data/lib/daru/plotting/nyaplot/dataframe.rb +187 -0
  46. data/lib/daru/plotting/nyaplot/vector.rb +46 -0
  47. data/lib/daru/vector.rb +694 -421
  48. data/lib/daru/version.rb +1 -1
  49. data/profile/_base.rb +23 -0
  50. data/profile/df_to_a.rb +10 -0
  51. data/profile/filter.rb +13 -0
  52. data/profile/joining.rb +13 -0
  53. data/profile/sorting.rb +12 -0
  54. data/profile/vector_each_with_index.rb +9 -0
  55. data/spec/accessors/wrappers_spec.rb +2 -4
  56. data/spec/categorical_spec.rb +1734 -0
  57. data/spec/core/group_by_spec.rb +52 -2
  58. data/spec/core/merge_spec.rb +63 -2
  59. data/spec/core/query_spec.rb +236 -80
  60. data/spec/dataframe_spec.rb +1373 -79
  61. data/spec/date_time/data_spec.rb +3 -5
  62. data/spec/date_time/index_spec.rb +154 -17
  63. data/spec/date_time/offsets_spec.rb +3 -4
  64. data/spec/fixtures/empties.dat +2 -0
  65. data/spec/fixtures/strings.dat +2 -0
  66. data/spec/formatters/table_formatter_spec.rb +99 -0
  67. data/spec/helpers_spec.rb +8 -0
  68. data/spec/index/categorical_index_spec.rb +168 -0
  69. data/spec/index/index_spec.rb +283 -0
  70. data/spec/index/multi_index_spec.rb +570 -0
  71. data/spec/io/io_spec.rb +31 -4
  72. data/spec/io/sql_data_source_spec.rb +0 -1
  73. data/spec/iruby/dataframe_spec.rb +172 -0
  74. data/spec/iruby/helpers_spec.rb +49 -0
  75. data/spec/iruby/multi_index_spec.rb +37 -0
  76. data/spec/iruby/vector_spec.rb +107 -0
  77. data/spec/math/arithmetic/dataframe_spec.rb +71 -13
  78. data/spec/math/arithmetic/vector_spec.rb +8 -10
  79. data/spec/math/statistics/dataframe_spec.rb +3 -5
  80. data/spec/math/statistics/vector_spec.rb +45 -55
  81. data/spec/monkeys_spec.rb +32 -9
  82. data/spec/plotting/dataframe_spec.rb +386 -0
  83. data/spec/plotting/vector_spec.rb +230 -0
  84. data/spec/shared/vector_display_spec.rb +215 -0
  85. data/spec/spec_helper.rb +23 -0
  86. data/spec/vector_spec.rb +905 -138
  87. metadata +143 -11
  88. data/.rubocop_todo.yml +0 -44
  89. data/lib/daru/plotting/dataframe.rb +0 -104
  90. data/lib/daru/plotting/vector.rb +0 -38
  91. data/spec/daru_spec.rb +0 -58
  92. data/spec/index_spec.rb +0 -375
@@ -0,0 +1,46 @@
1
+ module Daru
2
+ module Plotting
3
+ module Vector
4
+ module NyaplotLibrary
5
+ # Plots a Vector with Nyaplot on IRuby using the given options. Yields the
6
+ # plot object (Nyaplot::Plot) and the diagram object (Nyaplot::Diagram)
7
+ # to the block, which can be used for setting various options as per the
8
+ # Nyaplot API.
9
+ #
10
+ # == Options
11
+ # type (:scatter, :bar, :histogram), title, x_label, y_label, color(true/false)
12
+ #
13
+ # == Usage
14
+ # vector = Daru::Vector.new [10,20,30,40], [:one, :two, :three, :four]
15
+ # vector.plot(type: :bar) do |plot|
16
+ # plot.title "My first plot"
17
+ # plot.width 1200
18
+ # end
19
+ def plot opts={}
20
+ options = {
21
+ type: :scatter
22
+ }.merge(opts)
23
+
24
+ x_axis = options[:type] == :scatter ? Array.new(size) { |i| i } : @index.to_a
25
+ plot = Nyaplot::Plot.new
26
+ diagram = create_diagram plot, options[:type], x_axis
27
+
28
+ yield plot, diagram if block_given?
29
+
30
+ plot.show
31
+ end
32
+
33
+ private
34
+
35
+ def create_diagram plot, type, x_axis
36
+ case type
37
+ when :box, :histogram
38
+ plot.add(type, @data.to_a)
39
+ else
40
+ plot.add(type, x_axis, @data.to_a)
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -1,16 +1,98 @@
1
1
  require 'daru/maths/arithmetic/vector.rb'
2
2
  require 'daru/maths/statistics/vector.rb'
3
- require 'daru/plotting/vector.rb'
3
+ require 'daru/plotting/gruff.rb'
4
+ require 'daru/plotting/nyaplot.rb'
4
5
  require 'daru/accessors/array_wrapper.rb'
5
6
  require 'daru/accessors/nmatrix_wrapper.rb'
6
7
  require 'daru/accessors/gsl_wrapper.rb'
8
+ require 'daru/category.rb'
7
9
 
8
10
  module Daru
9
- class Vector
11
+ class Vector # rubocop:disable Metrics/ClassLength
10
12
  include Enumerable
11
13
  include Daru::Maths::Arithmetic::Vector
12
14
  include Daru::Maths::Statistics::Vector
13
- include Daru::Plotting::Vector if Daru.has_nyaplot?
15
+ extend Gem::Deprecate
16
+
17
+ class << self
18
+ # Create a new vector by specifying the size and an optional value
19
+ # and block to generate values.
20
+ #
21
+ # == Description
22
+ #
23
+ # The *new_with_size* class method lets you create a Daru::Vector
24
+ # by specifying the size as the argument. The optional block, if
25
+ # supplied, is run once for populating each element in the Vector.
26
+ #
27
+ # The result of each run of the block is the value that is ultimately
28
+ # assigned to that position in the Vector.
29
+ #
30
+ # == Options
31
+ # :value
32
+ # All the rest like .new
33
+ def new_with_size n, opts={}, &block
34
+ value = opts.delete :value
35
+ block ||= ->(_) { value }
36
+ Daru::Vector.new Array.new(n, &block), opts
37
+ end
38
+
39
+ # Create a vector using (almost) any object
40
+ # * Array: flattened
41
+ # * Range: transformed using to_a
42
+ # * Daru::Vector
43
+ # * Numeric and string values
44
+ #
45
+ # == Description
46
+ #
47
+ # The `Vector.[]` class method creates a vector from almost any
48
+ # object that has a `#to_a` method defined on it. It is similar
49
+ # to R's `c` method.
50
+ #
51
+ # == Usage
52
+ #
53
+ # a = Daru::Vector[1,2,3,4,6..10]
54
+ # #=>
55
+ # # <Daru::Vector:99448510 @name = nil @size = 9 >
56
+ # # nil
57
+ # # 0 1
58
+ # # 1 2
59
+ # # 2 3
60
+ # # 3 4
61
+ # # 4 6
62
+ # # 5 7
63
+ # # 6 8
64
+ # # 7 9
65
+ # # 8 10
66
+ def [](*indexes)
67
+ values = indexes.map do |a|
68
+ a.respond_to?(:to_a) ? a.to_a : a
69
+ end.flatten
70
+ Daru::Vector.new(values)
71
+ end
72
+
73
+ def _load(data) # :nodoc:
74
+ h = Marshal.load(data)
75
+ Daru::Vector.new(h[:data],
76
+ index: h[:index],
77
+ name: h[:name],
78
+ dtype: h[:dtype], missing_values: h[:missing_values])
79
+ end
80
+
81
+ def coerce(data, options={})
82
+ case data
83
+ when Daru::Vector
84
+ data
85
+ when Array, Hash
86
+ new(data, options)
87
+ else
88
+ raise ArgumentError, "Can't coerce #{data.class} to #{self}"
89
+ end
90
+ end
91
+ end
92
+
93
+ def size
94
+ @data.size
95
+ end
14
96
 
15
97
  def each(&block)
16
98
  return to_enum(:each) unless block_given?
@@ -26,17 +108,17 @@ module Daru
26
108
  self
27
109
  end
28
110
 
29
- def each_with_index
111
+ def each_with_index &block
30
112
  return to_enum(:each_with_index) unless block_given?
31
113
 
32
- @index.each { |i| yield(self[i], i) }
114
+ @data.to_a.zip(@index.to_a).each(&block)
115
+
33
116
  self
34
117
  end
35
118
 
36
119
  def map!(&block)
37
120
  return to_enum(:map!) unless block_given?
38
121
  @data.map!(&block)
39
- update
40
122
  self
41
123
  end
42
124
 
@@ -44,8 +126,6 @@ module Daru
44
126
  attr_reader :name
45
127
  # The row index. Can be either Daru::Index or Daru::MultiIndex.
46
128
  attr_reader :index
47
- # The total number of elements of the vector.
48
- attr_reader :size
49
129
  # The underlying dtype of the Vector. Can be either :array, :nmatrix or :gsl.
50
130
  attr_reader :dtype
51
131
  # If the dtype is :nmatrix, this attribute represents the data type of the
@@ -54,13 +134,16 @@ module Daru
54
134
  attr_reader :nm_dtype
55
135
  # An Array or the positions in the vector that are being treated as 'missing'.
56
136
  attr_reader :missing_positions
137
+ deprecate :missing_positions, :indexes, 2016, 10
57
138
  # Store a hash of labels for values. Supplementary only. Recommend using index
58
139
  # for proper usage.
59
140
  attr_accessor :labels
60
141
  # Store vector data in an array
61
142
  attr_reader :data
62
- # Attach arbitrary metadata to vector (usu a hash)
63
- attr_accessor :metadata
143
+ # Ploting library being used for this vector
144
+ attr_reader :plotting_library
145
+ # TODO: Make private.
146
+ attr_reader :nil_positions, :nan_positions
64
147
 
65
148
  # Create a Vector object.
66
149
  #
@@ -93,102 +176,27 @@ module Daru
93
176
  # vecarr = Daru::Vector.new [1,2,3,4], index: [:a, :e, :i, :o]
94
177
  # vechsh = Daru::Vector.new({a: 1, e: 2, i: 3, o: 4})
95
178
  def initialize source, opts={}
96
- index = nil
97
- if source.is_a?(Hash)
98
- index = source.keys
99
- source = source.values
179
+ if opts[:type] == :category
180
+ # Initialize category type vector
181
+ extend Daru::Category
182
+ initialize_category source, opts
100
183
  else
101
- index = opts[:index]
102
- source ||= []
184
+ # Initialize non-category type vector
185
+ initialize_vector source, opts
103
186
  end
104
- name = opts[:name]
105
- set_name name
106
-
107
- @metadata = opts[:metadata] || {}
108
-
109
- @data = cast_vector_to(opts[:dtype] || :array, source, opts[:nm_dtype])
110
- @index = try_create_index(index || @data.size)
111
-
112
- if @index.size > @data.size
113
- cast(dtype: :array) # NM with nils seg faults
114
- (@index.size - @data.size).times { @data << nil }
115
- elsif @index.size < @data.size
116
- raise IndexError, "Expected index size >= vector size. Index size : #{@index.size}, vector size : #{@data.size}"
117
- end
118
-
119
- @possibly_changed_type = true
120
- set_missing_values opts[:missing_values]
121
- set_missing_positions
122
- set_size
123
187
  end
124
188
 
125
- # Create a new vector by specifying the size and an optional value
126
- # and block to generate values.
127
- #
128
- # == Description
129
- #
130
- # The *new_with_size* class method lets you create a Daru::Vector
131
- # by specifying the size as the argument. The optional block, if
132
- # supplied, is run once for populating each element in the Vector.
133
- #
134
- # The result of each run of the block is the value that is ultimately
135
- # assigned to that position in the Vector.
136
- #
137
- # == Options
138
- # :value
139
- # All the rest like .new
140
- def self.new_with_size n, opts={}, &block
141
- value = opts[:value]
142
- opts.delete :value
143
- if block
144
- Daru::Vector.new Array.new(n) { |i| block.call(i) }, opts
189
+ def plotting_library= lib
190
+ case lib
191
+ when :gruff, :nyaplot
192
+ @plotting_library = lib
193
+ extend Module.const_get(
194
+ "Daru::Plotting::Vector::#{lib.to_s.capitalize}Library"
195
+ ) if Daru.send("has_#{lib}?".to_sym)
145
196
  else
146
- Daru::Vector.new Array.new(n) { value }, opts
147
- end
148
- end
149
-
150
- # Create a vector using (almost) any object
151
- # * Array: flattened
152
- # * Range: transformed using to_a
153
- # * Daru::Vector
154
- # * Numeric and string values
155
- #
156
- # == Description
157
- #
158
- # The `Vector.[]` class method creates a vector from almost any
159
- # object that has a `#to_a` method defined on it. It is similar
160
- # to R's `c` method.
161
- #
162
- # == Usage
163
- #
164
- # a = Daru::Vector[1,2,3,4,6..10]
165
- # #=>
166
- # # <Daru::Vector:99448510 @name = nil @size = 9 >
167
- # # nil
168
- # # 0 1
169
- # # 1 2
170
- # # 2 3
171
- # # 3 4
172
- # # 4 6
173
- # # 5 7
174
- # # 6 8
175
- # # 7 9
176
- # # 8 10
177
- def self.[](*args)
178
- values = []
179
- args.each do |a|
180
- case a
181
- when Array
182
- values.concat a.flatten
183
- when Daru::Vector
184
- values.concat a.to_a
185
- when Range
186
- values.concat a.to_a
187
- else
188
- values << a
189
- end
197
+ raise ArguementError, "Plotting library #{lib} not supported. "\
198
+ 'Supported libraries are :nyaplot and :gruff'
190
199
  end
191
- Daru::Vector.new(values)
192
200
  end
193
201
 
194
202
  # Get one or more elements with specified index or a range.
@@ -203,19 +211,63 @@ module Daru
203
211
  # # For vectors employing hierarchial multi index
204
212
  #
205
213
  def [](*input_indexes)
206
- # Get a proper index object
207
- indexes = @index[*input_indexes]
214
+ # Get array of positions indexes
215
+ positions = @index.pos(*input_indexes)
208
216
 
209
217
  # If one object is asked return it
210
- return @data[indexes] if indexes.is_a? Numeric
218
+ return @data[positions] if positions.is_a? Numeric
211
219
 
212
- # Form a new Vector using indexes and return it
220
+ # Form a new Vector using positional indexes
213
221
  Daru::Vector.new(
214
- indexes.map { |loc| @data[@index[loc]] },
215
- name: @name, metadata: @metadata.dup, index: indexes.conform(input_indexes), dtype: @dtype
222
+ positions.map { |loc| @data[loc] },
223
+ name: @name,
224
+ index: @index.subset(*input_indexes), dtype: @dtype
216
225
  )
217
226
  end
218
227
 
228
+ # Returns vector of values given positional values
229
+ # @param [Array<object>] *positions positional values
230
+ # @return [object] vector
231
+ # @example
232
+ # dv = Daru::Vector.new 'a'..'e'
233
+ # dv.at 0, 1, 2
234
+ # # => #<Daru::Vector(3)>
235
+ # # 0 a
236
+ # # 1 b
237
+ # # 2 c
238
+ def at *positions
239
+ # to be used to form index
240
+ original_positions = positions
241
+ positions = coerce_positions(*positions)
242
+ validate_positions(*positions)
243
+
244
+ if positions.is_a? Integer
245
+ @data[positions]
246
+ else
247
+ values = positions.map { |pos| @data[pos] }
248
+ Daru::Vector.new values, index: @index.at(*original_positions), dtype: dtype
249
+ end
250
+ end
251
+
252
+ # Change value at given positions
253
+ # @param [Array<object>] *positions positional values
254
+ # @param [object] val value to assign
255
+ # @example
256
+ # dv = Daru::Vector.new 'a'..'e'
257
+ # dv.set_at [0, 1], 'x'
258
+ # dv
259
+ # # => #<Daru::Vector(5)>
260
+ # # 0 x
261
+ # # 1 x
262
+ # # 2 c
263
+ # # 3 d
264
+ # # 4 e
265
+ def set_at positions, val
266
+ validate_positions(*positions)
267
+ positions.map { |pos| @data[pos] = val }
268
+ update_position_cache
269
+ end
270
+
219
271
  # Just like in Hashes, you can specify the index label of the Daru::Vector
220
272
  # and assign an element an that place in the Daru::Vector.
221
273
  #
@@ -229,57 +281,14 @@ module Daru
229
281
  # # a 999
230
282
  # # b 2
231
283
  # # c 3
232
- def []=(*location, value)
233
- cast(dtype: :array) if value.nil? && dtype != :array
234
-
235
- @possibly_changed_type = true if @type == :object && (value.nil? ||
236
- value.is_a?(Numeric))
237
- @possibly_changed_type = true if @type == :numeric && (!value.is_a?(Numeric) &&
238
- !value.nil?)
239
-
240
- pos = @index[*location]
241
-
242
- if pos.is_a?(Numeric)
243
- @data[pos] = value
244
- else
245
- begin
246
- pos.each { |tuple| self[tuple] = value }
247
- rescue NoMethodError
248
- raise IndexError, "Specified index #{pos.inspect} does not exist."
249
- end
250
- end
251
-
252
- set_size
253
- set_missing_positions unless Daru.lazy_update
254
- end
284
+ def []=(*indexes, val)
285
+ cast(dtype: :array) if val.nil? && dtype != :array
255
286
 
256
- # The values to be treated as 'missing'. *nil* is the default missing
257
- # type. To set missing values see the missing_values= method.
258
- def missing_values
259
- @missing_values.keys
260
- end
287
+ guard_type_check(val)
261
288
 
262
- # Assign an Array to treat certain values as 'missing'.
263
- #
264
- # == Usage
265
- #
266
- # v = Daru::Vector.new [1,2,3,4,5]
267
- # v.missing_values = [3]
268
- # v.update
269
- # v.missing_positions
270
- # #=> [2]
271
- def missing_values= values
272
- set_missing_values values
273
- set_missing_positions unless Daru.lazy_update
274
- end
289
+ modify_vector(indexes, val)
275
290
 
276
- # Method for updating the metadata (i.e. missing value positions) of the
277
- # after assingment/deletion etc. are complete. This is provided so that
278
- # time is not wasted in creating the metadata for the vector each time
279
- # assignment/deletion of elements is done. Updating data this way is called
280
- # lazy loading. To set or unset lazy loading, see the .lazy_update= method.
281
- def update
282
- Daru.lazy_update and set_missing_positions
291
+ update_position_cache
283
292
  end
284
293
 
285
294
  # Two vectors are equal if the have the exact same index values corresponding
@@ -287,7 +296,7 @@ module Daru
287
296
  def == other
288
297
  case other
289
298
  when Daru::Vector
290
- @index == other.index && @size == other.size &&
299
+ @index == other.index && size == other.size &&
291
300
  @index.all? { |index| self[index] == other[index] }
292
301
  else
293
302
  super
@@ -405,8 +414,8 @@ module Daru
405
414
  # # 11 5
406
415
  # # 13 5
407
416
  # # 15 1
408
- def where bool_arry
409
- Daru::Core::Query.vector_where @data.to_a, @index.to_a, bool_arry, dtype
417
+ def where bool_array
418
+ Daru::Core::Query.vector_where self, bool_array
410
419
  end
411
420
 
412
421
  def head q=10
@@ -414,18 +423,41 @@ module Daru
414
423
  end
415
424
 
416
425
  def tail q=10
417
- self[(@size - q)..(@size-1)]
426
+ start = [size - q, 0].max
427
+ self[start..(size-1)]
418
428
  end
419
429
 
420
430
  def empty?
421
431
  @index.empty?
422
432
  end
423
433
 
434
+ def numeric?
435
+ type == :numeric
436
+ end
437
+
438
+ def object?
439
+ type == :object
440
+ end
441
+
424
442
  # Reports whether missing data is present in the Vector.
425
443
  def has_missing_data?
426
- !missing_positions.empty?
444
+ !indexes(*Daru::MISSING_VALUES).empty?
427
445
  end
428
446
  alias :flawed? :has_missing_data?
447
+ deprecate :has_missing_data?, :include_values?, 2016, 10
448
+ deprecate :flawed?, :include_values?, 2016, 10
449
+
450
+ # Check if any one of mentioned values occur in the vector
451
+ # @param [Array] *values values to check for
452
+ # @return [true, false] returns true if any one of specified values
453
+ # occur in the vector
454
+ # @example
455
+ # dv = Daru::Vector.new [1, 2, 3, 4, nil]
456
+ # dv.include_values? nil, Float::NAN
457
+ # # => true
458
+ def include_values?(*values)
459
+ values.any? { |v| include_with_nan? @data, v }
460
+ end
429
461
 
430
462
  # Append an element to the vector by specifying the element and index
431
463
  def concat element, index
@@ -434,8 +466,7 @@ module Daru
434
466
  @index |= [index]
435
467
  @data[@index[index]] = element
436
468
 
437
- set_size
438
- set_missing_positions unless Daru.lazy_update
469
+ update_position_cache
439
470
  end
440
471
  alias :push :concat
441
472
  alias :<< :concat
@@ -463,8 +494,7 @@ module Daru
463
494
  @data.delete_at @index[index]
464
495
  @index = Daru::Index.new(@index.to_a - [index])
465
496
 
466
- set_size
467
- set_missing_positions unless Daru.lazy_update
497
+ update_position_cache
468
498
  end
469
499
 
470
500
  # The type of data contained in the vector. Can be :object or :numeric. If
@@ -489,6 +519,16 @@ module Daru
489
519
  @type
490
520
  end
491
521
 
522
+ # Tells if vector is categorical or not.
523
+ # @return [true, false] true if vector is of type category, false otherwise
524
+ # @example
525
+ # dv = Daru::Vector.new [1, 2, 3], type: :category
526
+ # dv.category?
527
+ # # => true
528
+ def category?
529
+ type == :category
530
+ end
531
+
492
532
  # Get index of element
493
533
  def index_of element
494
534
  case dtype
@@ -500,11 +540,9 @@ module Daru
500
540
  # Keep only unique elements of the vector alongwith their indexes.
501
541
  def uniq
502
542
  uniq_vector = @data.uniq
503
- new_index = uniq_vector.each_with_object([]) do |element, acc|
504
- acc << index_of(element)
505
- end
543
+ new_index = uniq_vector.map { |element| index_of(element) }
506
544
 
507
- Daru::Vector.new uniq_vector, name: @name, metadata: @metadata.dup, index: new_index, dtype: @dtype
545
+ Daru::Vector.new uniq_vector, name: @name, index: new_index, dtype: @dtype
508
546
  end
509
547
 
510
548
  def any? &block
@@ -531,47 +569,46 @@ module Daru
531
569
  # v = Daru::Vector.new ["My first guitar", "jazz", "guitar"]
532
570
  # # Say you want to sort these strings by length.
533
571
  # v.sort(ascending: false) { |a,b| a.length <=> b.length }
534
- def sort opts={}
535
- opts = {
536
- ascending: true
537
- }.merge(opts)
538
-
539
- vector_index = @data.each_with_index
540
- vector_index =
541
- if block_given?
542
- vector_index.sort { |a,b| yield(a[0], b[0]) }
543
- else
544
- vector_index.sort { |(av, ai), (bv, bi)|
545
- if !av.nil? && !bv.nil?
546
- av <=> bv
547
- elsif av.nil? && bv.nil?
548
- ai <=> bi
549
- elsif av.nil?
550
- opts[:ascending] ? -1 : 1
551
- else
552
- opts[:ascending] ? 1 : -1
553
- end
554
- }
555
- end
556
- vector_index.reverse! unless opts[:ascending]
572
+ def sort opts={}, &block
573
+ opts = {ascending: true}.merge(opts)
574
+
575
+ vector_index = resort_index(@data.each_with_index, opts, &block)
557
576
  vector, index = vector_index.transpose
558
- old_index = @index.to_a
559
- index = index.map { |i| old_index[i] }
560
577
 
561
- Daru::Vector.new(vector, index: index, name: @name, metadata: @metadata.dup, dtype: @dtype)
578
+ index = @index.reorder index
579
+
580
+ Daru::Vector.new(vector, index: index, name: @name, dtype: @dtype)
581
+ end
582
+
583
+ DEFAULT_SORTER = lambda { |(lv, li), (rv, ri)|
584
+ case
585
+ when lv.nil? && rv.nil?
586
+ li <=> ri
587
+ when lv.nil?
588
+ -1
589
+ when rv.nil?
590
+ 1
591
+ else
592
+ lv <=> rv
593
+ end
594
+ }
595
+
596
+ def resort_index vector_index, opts
597
+ if block_given?
598
+ vector_index.sort { |(lv, _li), (rv, _ri)| yield(lv, rv) }
599
+ else
600
+ vector_index.sort(&DEFAULT_SORTER)
601
+ end
602
+ .tap { |res| res.reverse! unless opts[:ascending] }
562
603
  end
563
604
 
564
605
  # Just sort the data and get an Array in return using Enumerable#sort.
565
606
  # Non-destructive.
607
+ # :nocov:
566
608
  def sorted_data &block
567
609
  @data.to_a.sort(&block)
568
610
  end
569
-
570
- # Returns *true* if the value passed is actually exists or is not marked as
571
- # a *missing value*.
572
- def exists? value
573
- !@missing_values.key?(self[index_of(value)])
574
- end
611
+ # :nocov:
575
612
 
576
613
  # Like map, but returns a Daru::Vector with the returned values.
577
614
  def recode dt=nil, &block
@@ -593,19 +630,12 @@ module Daru
593
630
  def delete_if
594
631
  return to_enum(:delete_if) unless block_given?
595
632
 
596
- keep_e = []
597
- keep_i = []
598
- each_with_index do |n, i|
599
- unless yield(n)
600
- keep_e << n
601
- keep_i << i
602
- end
603
- end
633
+ keep_e, keep_i = each_with_index.select { |n, _i| !yield(n) }.transpose
604
634
 
605
635
  @data = cast_vector_to @dtype, keep_e
606
636
  @index = Daru::Index.new(keep_i)
607
- set_missing_positions unless Daru.lazy_update
608
- set_size
637
+
638
+ update_position_cache
609
639
 
610
640
  self
611
641
  end
@@ -614,32 +644,16 @@ module Daru
614
644
  def keep_if
615
645
  return to_enum(:keep_if) unless block_given?
616
646
 
617
- keep_e = []
618
- keep_i = []
619
- each_with_index do |n, i|
620
- if yield(n)
621
- keep_e << n
622
- keep_i << i
623
- end
624
- end
625
-
626
- @data = cast_vector_to @dtype, keep_e
627
- @index = Daru::Index.new(keep_i)
628
- set_missing_positions unless Daru.lazy_update
629
- set_size
630
-
631
- self
647
+ delete_if { |val| !yield(val) }
632
648
  end
633
649
 
634
650
  # Reports all values that doesn't comply with a condition.
635
651
  # Returns a hash with the index of data and the invalid data.
636
652
  def verify
637
- h = {}
638
- (0...size).each do |i|
639
- h[i] = @data[i] unless yield(@data[i])
640
- end
641
-
642
- h
653
+ (0...size)
654
+ .map { |i| [i, @data[i]] }
655
+ .reject { |_i, val| yield(val) }
656
+ .to_h
643
657
  end
644
658
 
645
659
  # Return an Array with the data splitted by a separator.
@@ -674,29 +688,19 @@ module Daru
674
688
  #
675
689
  def split_by_separator sep=','
676
690
  split_data = splitted sep
677
- factors = split_data.flatten.uniq.compact
678
-
679
- out = factors.map { |x| [x, []] }.to_h
680
-
681
- split_data.each do |r|
682
- if r.nil?
683
- factors.each do |f|
684
- out[f].push(nil)
685
- end
686
- else
687
- factors.each do |f|
688
- out[f].push(r.include?(f) ? 1 : 0)
689
- end
690
- end
691
- end
692
-
693
- out.map { |k, v| [k, Daru::Vector.new(v)] }.to_h
691
+ split_data
692
+ .flatten.uniq.compact.map do |key|
693
+ [
694
+ key,
695
+ Daru::Vector.new(split_data.map { |v| split_value(key, v) })
696
+ ]
697
+ end.to_h
694
698
  end
695
699
 
696
700
  def split_by_separator_freq(sep=',')
697
- split_by_separator(sep).map do |k, v|
698
- [k, v.inject { |s,x| s+x.to_i }]
699
- end.to_h
701
+ split_by_separator(sep).map { |k, v|
702
+ [k, v.map(&:to_i).inject(:+)]
703
+ }.to_h
700
704
  end
701
705
 
702
706
  def reset_index!
@@ -718,23 +722,15 @@ module Daru
718
722
  # # 1 false
719
723
  # # 2 false
720
724
  # # 3 true
725
+ #
721
726
  def is_nil?
722
- nil_truth_vector = clone_structure
723
- @index.each do |idx|
724
- nil_truth_vector[idx] = self[idx].nil? ? true : false
725
- end
726
-
727
- nil_truth_vector
727
+ # FIXME: EXTREMELY bad name for method not returning boolean - zverok, 2016-05-18
728
+ recode(&:nil?)
728
729
  end
729
730
 
730
731
  # Opposite of #is_nil?
731
732
  def not_nil?
732
- nil_truth_vector = clone_structure
733
- @index.each do |idx|
734
- nil_truth_vector[idx] = self[idx].nil? ? false : true
735
- end
736
-
737
- nil_truth_vector
733
+ recode { |v| !v.nil? }
738
734
  end
739
735
 
740
736
  # Replace all nils in the vector with the value passed as an argument. Destructive.
@@ -744,7 +740,7 @@ module Daru
744
740
  #
745
741
  # * +replacement+ - The value which should replace all nils
746
742
  def replace_nils! replacement
747
- missing_positions.each do |idx|
743
+ indexes(*Daru::MISSING_VALUES).each do |idx|
748
744
  self[idx] = replacement
749
745
  end
750
746
 
@@ -765,13 +761,13 @@ module Daru
765
761
  # ts.lag # => [nil, 0.69, 0.23, 0.44, ...]
766
762
  # ts.lag(2) # => [nil, nil, 0.69, 0.23, ...]
767
763
  def lag k=1
768
- return dup if k == 0
764
+ return dup if k.zero?
769
765
 
770
766
  dat = @data.to_a.dup
771
767
  (dat.size - 1).downto(k) { |i| dat[i] = dat[i - k] }
772
768
  (0...k).each { |i| dat[i] = nil }
773
769
 
774
- Daru::Vector.new(dat, index: @index, name: @name, metadata: @metadata.dup)
770
+ Daru::Vector.new(dat, index: @index, name: @name)
775
771
  end
776
772
 
777
773
  def detach_index
@@ -788,7 +784,19 @@ module Daru
788
784
 
789
785
  # number of non-missing elements
790
786
  def n_valid
791
- @size - missing_positions.size
787
+ size - indexes(*Daru::MISSING_VALUES).size
788
+ end
789
+ deprecate :n_valid, :count_values, 2016, 10
790
+
791
+ # Count the number of values specified
792
+ # @param [Array] *values values to count for
793
+ # @return [Integer] the number of times the values mentioned occurs
794
+ # @example
795
+ # dv = Daru::Vector.new [1, 2, 1, 2, 3, 4, nil, nil]
796
+ # dv.count_values nil
797
+ # # => 2
798
+ def count_values(*values)
799
+ positions(*values).size
792
800
  end
793
801
 
794
802
  # Returns *true* if an index exists
@@ -796,6 +804,11 @@ module Daru
796
804
  @index.include? index
797
805
  end
798
806
 
807
+ # @return [Daru::DataFrame] the vector as a single-vector dataframe
808
+ def to_df
809
+ Daru::DataFrame.new({@name => @data}, name: @name, index: @index)
810
+ end
811
+
799
812
  # Convert Vector to a horizontal or vertical Ruby Matrix.
800
813
  #
801
814
  # == Arguments
@@ -811,11 +824,39 @@ module Daru
811
824
  end
812
825
  end
813
826
 
827
+ # Convert vector to nmatrix object
828
+ # @param [Symbol] axis :horizontal or :vertical
829
+ # @return [NMatrix] NMatrix object containing all values of the vector
830
+ # @example
831
+ # dv = Daru::Vector.new [1, 2, 3]
832
+ # dv.to_nmatrix
833
+ # # =>
834
+ # # [
835
+ # # [1, 2, 3] ]
836
+ def to_nmatrix axis=:horizontal
837
+ raise ArgumentError, 'Can not convert to nmatrix'\
838
+ 'because the vector is numeric' unless numeric? && !include?(nil)
839
+
840
+ case axis
841
+ when :horizontal
842
+ NMatrix.new [1, size], to_a
843
+ when :vertical
844
+ NMatrix.new [size, 1], to_a
845
+ else
846
+ raise ArgumentError, 'Invalid axis specified. '\
847
+ 'Valid axis are :horizontal and :vertical'
848
+ end
849
+ end
850
+
814
851
  # If dtype != gsl, will convert data to GSL::Vector with to_a. Otherwise returns
815
852
  # the stored GSL::Vector object.
816
853
  def to_gsl
817
854
  raise NoMethodError, 'Install gsl-nmatrix for access to this functionality.' unless Daru.has_gsl?
818
- dtype == :gsl ? @data.data : GSL::Vector.alloc(only_valid(:array).to_a)
855
+ if dtype == :gsl
856
+ @data.data
857
+ else
858
+ GSL::Vector.alloc(reject_values(*Daru::MISSING_VALUES).to_a)
859
+ end
819
860
  end
820
861
 
821
862
  # Convert to hash (explicit). Hash keys are indexes and values are the correspoding elements
@@ -835,30 +876,12 @@ module Daru
835
876
 
836
877
  # Convert to html for iruby
837
878
  def to_html threshold=30
838
- name = @name || 'nil'
839
- html = '<table>' \
840
- '<tr>' \
841
- '<th colspan="2">' \
842
- "Daru::Vector:#{object_id} " + " size: #{size}" \
843
- '</th>' \
844
- '</tr>'
845
- html += '<tr><th> </th><th>' + name.to_s + '</th></tr>'
846
- @index.each_with_index do |index, num|
847
- html += '<tr><td>' + index.to_s + '</td>' + '<td>' + self[index].to_s + '</td></tr>'
848
-
849
- next if num <= threshold
850
- html += '<tr><td>...</td><td>...</td></tr>'
851
-
852
- last_index = @index.to_a.last
853
- html += '<tr>' \
854
- '<td>' + last_index.to_s + '</td>' \
855
- '<td>' + self[last_index].to_s + '</td>' \
856
- '</tr>'
857
- break
858
- end
859
- html += '</table>'
860
-
861
- html
879
+ path = if index.is_a?(MultiIndex)
880
+ File.expand_path('../iruby/templates/vector_mi.html.erb', __FILE__)
881
+ else
882
+ File.expand_path('../iruby/templates/vector.html.erb', __FILE__)
883
+ end
884
+ ERB.new(File.read(path).strip).result(binding)
862
885
  end
863
886
 
864
887
  def to_s
@@ -870,10 +893,11 @@ module Daru
870
893
  ReportBuilder.new(no_title: true).add(self).send(method)
871
894
  end
872
895
 
873
- def report_building b
896
+ # :nocov:
897
+ def report_building b # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
874
898
  b.section(name: name) do |s|
875
899
  s.text "n :#{size}"
876
- s.text "n valid:#{n_valid}"
900
+ s.text "n valid:#{count_values(*Daru::MISSING_VALUES)}"
877
901
  if @type == :object
878
902
  s.text "factors: #{factors.to_a.join(',')}"
879
903
  s.text "mode: #{mode}"
@@ -881,7 +905,7 @@ module Daru
881
905
  s.table(name: 'Distribution') do |t|
882
906
  frequencies.sort_by(&:to_s).each do |k,v|
883
907
  key = @index.include?(k) ? @index[k] : k
884
- t.row [key, v, ('%0.2f%%' % (v.quo(n_valid)*100))]
908
+ t.row [key, v, ('%0.2f%%' % (v.quo(count_values(*Daru::MISSING_VALUES))*100))]
885
909
  end
886
910
  end
887
911
  end
@@ -898,47 +922,71 @@ module Daru
898
922
  end
899
923
  end
900
924
  end
925
+ # :nocov:
901
926
 
902
927
  # Over rides original inspect for pretty printing in irb
903
928
  def inspect spacing=20, threshold=15
904
- longest =
905
- [
906
- @name.to_s.size,
907
- (@index.to_a.map(&:to_s).map(&:size).max || 0),
908
- (@data.map(&:to_s).map(&:size).max || 0),
909
- 3 # 'nil'.size
910
- ].max
911
-
912
- content = ''
913
- longest = spacing if longest > spacing
914
- name = @name || 'nil'
915
- metadata = @metadata || 'nil'
916
- formatter = "\n%#{longest}.#{longest}s %#{longest}.#{longest}s"
917
- content += "\n#<#{self.class}:#{object_id} @name = #{name} @metadata = #{metadata} @size = #{size} >"
918
-
919
- content += formatter % ['', name]
920
- @index.each_with_index do |index, num|
921
- content += formatter % [index.to_s, (self[*index] || 'nil').to_s]
922
- if num > threshold
923
- content += formatter % ['...', '...']
924
- break
925
- end
929
+ row_headers = index.is_a?(MultiIndex) ? index.sparse_tuples : index.to_a
930
+
931
+ "#<#{self.class}(#{size})#{':cataegory' if category?}>\n" +
932
+ Formatters::Table.format(
933
+ to_a.lazy.map { |v| [v] },
934
+ headers: @name && [@name],
935
+ row_headers: row_headers,
936
+ threshold: threshold,
937
+ spacing: spacing
938
+ )
939
+ end
940
+
941
+ # Sets new index for vector. Preserves index->value correspondence.
942
+ # Sets nil for new index keys absent from original index.
943
+ # @note Unlike #reorder! which takes positions as input it takes
944
+ # index as an input to reorder the vector
945
+ # @param [Daru::Index, Daru::MultiIndex] new_index new index to order with
946
+ # @return [Daru::Vector] vector reindexed with new index
947
+ def reindex! new_index
948
+ values = []
949
+ each_with_index do |val, i|
950
+ values[new_index[i]] = val if new_index.include?(i)
926
951
  end
927
- content += "\n"
952
+ values.fill(nil, values.size, new_index.size - values.size)
953
+
954
+ @data = cast_vector_to @dtype, values
955
+ @index = new_index
928
956
 
929
- content
957
+ update_position_cache
958
+
959
+ self
960
+ end
961
+
962
+ # Reorder the vector with given positions
963
+ # @note Unlike #reindex! which takes index as input, it takes
964
+ # positions as an input to reorder the vector
965
+ # @param [Array] order the order to reorder the vector with
966
+ # @return reordered vector
967
+ # @example
968
+ # dv = Daru::Vector.new [3, 2, 1], index: ['c', 'b', 'a']
969
+ # dv.reorder! [2, 1, 0]
970
+ # # => #<Daru::Vector(3)>
971
+ # # a 1
972
+ # # b 2
973
+ # # c 3
974
+ def reorder! order
975
+ @index = @index.reorder order
976
+ @data = order.map { |i| @data[i] }
977
+ update_position_cache
978
+ self
979
+ end
980
+
981
+ # Non-destructive version of #reorder!
982
+ def reorder order
983
+ dup.reorder! order
930
984
  end
931
985
 
932
986
  # Create a new vector with a different index, and preserve the indexing of
933
987
  # current elements.
934
988
  def reindex new_index
935
- vector = Daru::Vector.new([], index: new_index, name: @name, metadata: @metadata.dup)
936
-
937
- new_index.each do |idx|
938
- vector[idx] = @index.include?(idx) ? self[idx] : nil
939
- end
940
-
941
- vector
989
+ dup.reindex!(new_index)
942
990
  end
943
991
 
944
992
  def index= idx
@@ -956,17 +1004,16 @@ module Daru
956
1004
  #
957
1005
  # @param new_name [Symbol] The new name.
958
1006
  def rename new_name
959
- if new_name.is_a?(Numeric)
960
- @name = new_name
961
- return
962
- end
963
-
964
1007
  @name = new_name
1008
+ self
965
1009
  end
966
1010
 
967
- # Duplicate elements and indexes
1011
+ alias_method :name=, :rename
1012
+
1013
+ # Duplicated a vector
1014
+ # @return [Daru::Vector] duplicated vector
968
1015
  def dup
969
- Daru::Vector.new @data.dup, name: @name, metadata: @metadata.dup, index: @index.dup
1016
+ Daru::Vector.new @data.dup, name: @name, index: @index.dup
970
1017
  end
971
1018
 
972
1019
  # == Bootstrap
@@ -1019,8 +1066,8 @@ module Daru
1019
1066
  #
1020
1067
  # == Reference:
1021
1068
  # * Sawyer, S. (2005). Resampling Data: Using a Statistical Jacknife.
1022
- def jackknife(estimators, k=1)
1023
- raise "n should be divisible by k:#{k}" unless size % k==0
1069
+ def jackknife(estimators, k=1) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
1070
+ raise "n should be divisible by k:#{k}" unless (size % k).zero?
1024
1071
 
1025
1072
  nb = (size / k).to_i
1026
1073
  h_est, es, ps = prepare_bootstrap(estimators)
@@ -1057,62 +1104,123 @@ module Daru
1057
1104
  # vector, setting this to false will return the same vector.
1058
1105
  # Otherwise, a duplicate will be returned irrespective of
1059
1106
  # presence of missing data.
1060
- def only_valid as_a=:vector, duplicate=true
1061
- return dup if !has_missing_data? && as_a == :vector && duplicate
1062
- return self if !has_missing_data? && as_a == :vector && !duplicate
1063
- return to_a if !has_missing_data? && as_a != :vector
1064
-
1065
- new_index = @index.to_a - missing_positions
1066
- new_vector = new_index.map do |idx|
1067
- self[idx]
1068
- end
1069
1107
 
1070
- return new_vector if as_a != :vector
1108
+ def only_valid as_a=:vector, _duplicate=true
1109
+ # FIXME: Now duplicate is just ignored.
1110
+ # There are no spec that fail on this case, so I'll leave it
1111
+ # this way for now - zverok, 2016-05-07
1071
1112
 
1072
- Daru::Vector.new new_vector, index: new_index, name: @name, metadata: @metadata.dup, dtype: dtype
1113
+ new_index = @index.to_a - indexes(*Daru::MISSING_VALUES)
1114
+ new_vector = new_index.map { |idx| self[idx] }
1115
+
1116
+ if as_a == :vector
1117
+ Daru::Vector.new new_vector, index: new_index, name: @name, dtype: dtype
1118
+ else
1119
+ new_vector
1120
+ end
1121
+ end
1122
+ deprecate :only_valid, :reject_values, 2016, 10
1123
+
1124
+ # Return a vector with specified values removed
1125
+ # @param [Array] *values values to reject from resultant vector
1126
+ # @return [Daru::Vector] vector with specified values removed
1127
+ # @example
1128
+ # dv = Daru::Vector.new [1, 2, nil, Float::NAN]
1129
+ # dv.reject_values nil, Float::NAN
1130
+ # # => #<Daru::Vector(2)>
1131
+ # # 0 1
1132
+ # # 1 2
1133
+ def reject_values(*values)
1134
+ resultant_pos = size.times.to_a - positions(*values)
1135
+ dv = at(*resultant_pos)
1136
+ # Handle the case when number of positions is 1
1137
+ # and hence #at doesn't return a vector
1138
+ if dv.is_a?(Daru::Vector)
1139
+ dv
1140
+ else
1141
+ pos = resultant_pos.first
1142
+ at(pos..pos)
1143
+ end
1144
+ end
1145
+
1146
+ # Return indexes of values specified
1147
+ # @param [Array] *values values to find indexes for
1148
+ # @return [Array] array of indexes of values specified
1149
+ # @example
1150
+ # dv = Daru::Vector.new [1, 2, nil, Float::NAN], index: 11..14
1151
+ # dv.indexes nil, Float::NAN
1152
+ # # => [13, 14]
1153
+ def indexes(*values)
1154
+ index.to_a.values_at(*positions(*values))
1155
+ end
1156
+
1157
+ # Replaces specified values with a new value
1158
+ # @param [Array] old_values array of values to replace
1159
+ # @param [object] new_value new value to replace with
1160
+ # @note It performs the replace in place.
1161
+ # @return [Daru::Vector] Same vector itself with values
1162
+ # replaced with new value
1163
+ # @example
1164
+ # dv = Daru::Vector.new [1, 2, :a, :b]
1165
+ # dv.replace_values [:a, :b], nil
1166
+ # dv
1167
+ # # =>
1168
+ # # #<Daru::Vector:19903200 @name = nil @metadata = {} @size = 4 >
1169
+ # # nil
1170
+ # # 0 1
1171
+ # # 1 2
1172
+ # # 2 nil
1173
+ # # 3 nil
1174
+ def replace_values(old_values, new_value)
1175
+ old_values = [old_values] unless old_values.is_a? Array
1176
+ size.times do |pos|
1177
+ set_at([pos], new_value) if include_with_nan? old_values, at(pos)
1178
+ end
1179
+ self
1073
1180
  end
1074
1181
 
1075
1182
  # Returns a Vector containing only missing data (preserves indexes).
1076
1183
  def only_missing as_a=:vector
1077
1184
  if as_a == :vector
1078
- self[*missing_positions]
1185
+ self[*indexes(*Daru::MISSING_VALUES)]
1079
1186
  elsif as_a == :array
1080
- self[*missing_positions].to_a
1187
+ self[*indexes(*Daru::MISSING_VALUES)].to_a
1081
1188
  end
1082
1189
  end
1190
+ deprecate :only_missing, nil, 2016, 10
1083
1191
 
1084
1192
  # Returns a Vector with only numerical data. Missing data is included
1085
1193
  # but non-Numeric objects are excluded. Preserves index.
1086
1194
  def only_numerics
1087
- numeric_indexes = []
1088
-
1089
- each_with_index do |v, i|
1090
- numeric_indexes << i if v.is_a?(Numeric) || @missing_values.key?(v)
1091
- end
1195
+ numeric_indexes =
1196
+ each_with_index
1197
+ .select { |v, _i| v.is_a?(Numeric) || v.nil? }
1198
+ .map(&:last)
1092
1199
 
1093
1200
  self[*numeric_indexes]
1094
1201
  end
1095
1202
 
1203
+ DATE_REGEXP = /^(\d{2}-\d{2}-\d{4}|\d{4}-\d{2}-\d{2})$/
1204
+
1096
1205
  # Returns the database type for the vector, according to its content
1097
1206
  def db_type
1098
1207
  # first, detect any character not number
1099
- if @data.find { |v| v.to_s=~/\d{2,2}-\d{2,2}-\d{4,4}/ } ||
1100
- @data.find { |v| v.to_s=~/\d{4,4}-\d{2,2}-\d{2,2}/ }
1101
-
1102
- return 'DATE'
1103
- elsif @data.find { |v| v.to_s=~/[^0-9e.-]/ }
1104
- return 'VARCHAR (255)'
1105
- elsif @data.find { |v| v.to_s=~/\./ }
1106
- return 'DOUBLE'
1208
+ case
1209
+ when @data.any? { |v| v.to_s =~ DATE_REGEXP }
1210
+ 'DATE'
1211
+ when @data.any? { |v| v.to_s =~ /[^0-9e.-]/ }
1212
+ 'VARCHAR (255)'
1213
+ when @data.any? { |v| v.to_s =~ /\./ }
1214
+ 'DOUBLE'
1107
1215
  else
1108
- return 'INTEGER'
1216
+ 'INTEGER'
1109
1217
  end
1110
1218
  end
1111
1219
 
1112
1220
  # Copies the structure of the vector (i.e the index, size, etc.) and fills all
1113
1221
  # all values with nils.
1114
1222
  def clone_structure
1115
- Daru::Vector.new(([nil]*@size), name: @name, metadata: @metadata.dup, index: @index.dup)
1223
+ Daru::Vector.new(([nil]*size), name: @name, index: @index.dup)
1116
1224
  end
1117
1225
 
1118
1226
  # Save the vector to a file
@@ -1129,38 +1237,156 @@ module Daru
1129
1237
  data: @data.to_a,
1130
1238
  dtype: @dtype,
1131
1239
  name: @name,
1132
- metadata: @metadata,
1133
- index: @index,
1134
- missing_values: @missing_values
1240
+ index: @index
1135
1241
  )
1136
1242
  end
1137
1243
 
1138
- def self._load(data) # :nodoc:
1139
- h = Marshal.load(data)
1140
- Daru::Vector.new(h[:data],
1141
- index: h[:index],
1142
- name: h[:name], metadata: h[:metadata],
1143
- dtype: h[:dtype], missing_values: h[:missing_values])
1144
- end
1145
-
1244
+ # :nocov:
1146
1245
  def daru_vector(*)
1147
1246
  self
1148
1247
  end
1248
+ # :nocov:
1149
1249
 
1150
1250
  alias :dv :daru_vector
1151
1251
 
1252
+ # Converts a non category type vector to category type vector.
1253
+ # @param [Hash] opts options to convert to category
1254
+ # @option opts [true, false] :ordered Specify if vector is ordered or not.
1255
+ # If it is ordered, it can be sorted and min, max like functions would work
1256
+ # @option opts [Array] :categories set categories in the specified order
1257
+ # @return [Daru::Vector] vector with type category
1258
+ def to_category opts={}
1259
+ dv = Daru::Vector.new to_a, type: :category, name: @name, index: @index
1260
+ dv.ordered = opts[:ordered] || false
1261
+ dv.categories = opts[:categories] if opts[:categories]
1262
+ dv
1263
+ end
1264
+
1152
1265
  def method_missing(name, *args, &block)
1266
+ # FIXME: it is shamefully fragile. Should be either made stronger
1267
+ # (string/symbol dychotomy, informative errors) or removed totally. - zverok
1153
1268
  if name =~ /(.+)\=/
1154
- self[name] = args[0]
1269
+ self[$1.to_sym] = args[0]
1155
1270
  elsif has_index?(name)
1156
1271
  self[name]
1157
1272
  else
1158
- super(name, *args, &block)
1273
+ super
1274
+ end
1275
+ end
1276
+
1277
+ def respond_to_missing?(name, include_private=false)
1278
+ name.to_s.end_with?('=') || has_index?(name) || super
1279
+ end
1280
+
1281
+ # Partition a numeric variable into categories.
1282
+ # @param [Array<Numeric>] partitions an array whose consecutive elements
1283
+ # provide intervals for categories
1284
+ # @param [Hash] opts options to cut the partition
1285
+ # @option opts [:left, :right] :close_at specifies whether the interval closes at
1286
+ # the right side of left side
1287
+ # @option opts [Array] :labels names of the categories
1288
+ # @return [Daru::Vector] numeric variable converted to categorical variable
1289
+ # @example
1290
+ # heights = Daru::Vector.new [30, 35, 32, 50, 42, 51]
1291
+ # height_cat = heights.cut [30, 40, 50, 60], labels=['low', 'medium', 'high']
1292
+ # # => #<Daru::Vector(6)>
1293
+ # # 0 low
1294
+ # # 1 low
1295
+ # # 2 low
1296
+ # # 3 high
1297
+ # # 4 medium
1298
+ # # 5 high
1299
+ def cut partitions, opts={}
1300
+ close_at, labels = opts[:close_at] || :right, opts[:labels]
1301
+ partitions = partitions.to_a
1302
+ values = to_a.map { |val| cut_find_category partitions, val, close_at }
1303
+ cats = cut_categories(partitions, close_at)
1304
+
1305
+ dv = Daru::Vector.new values,
1306
+ index: @index,
1307
+ type: :category,
1308
+ categories: cats
1309
+
1310
+ # Rename categories if new labels provided
1311
+ if labels
1312
+ dv.rename_categories Hash[cats.zip(labels)]
1313
+ else
1314
+ dv
1315
+ end
1316
+ end
1317
+
1318
+ def positions(*values)
1319
+ case values
1320
+ when [nil]
1321
+ nil_positions
1322
+ when [Float::NAN]
1323
+ nan_positions
1324
+ when [nil, Float::NAN], [Float::NAN, nil]
1325
+ nil_positions + nan_positions
1326
+ else
1327
+ size.times.select { |i| include_with_nan? values, @data[i] }
1159
1328
  end
1160
1329
  end
1161
1330
 
1162
1331
  private
1163
1332
 
1333
+ def nil_positions
1334
+ @nil_positions ||
1335
+ @nil_positions = size.times.select { |i| @data[i].nil? }
1336
+ end
1337
+
1338
+ def nan_positions
1339
+ @nan_positions ||
1340
+ @nan_positions = size.times.select do |i|
1341
+ @data[i].respond_to?(:nan?) && @data[i].nan?
1342
+ end
1343
+ end
1344
+
1345
+ def initialize_vector source, opts
1346
+ index, source = parse_source(source, opts)
1347
+ set_name opts[:name]
1348
+
1349
+ @data = cast_vector_to(opts[:dtype] || :array, source, opts[:nm_dtype])
1350
+ @index = Index.coerce(index || @data.size)
1351
+
1352
+ guard_sizes!
1353
+
1354
+ @possibly_changed_type = true
1355
+ # Include plotting functionality
1356
+ self.plotting_library = Daru.plotting_library
1357
+ end
1358
+
1359
+ def parse_source source, opts
1360
+ if source.is_a?(Hash)
1361
+ [source.keys, source.values]
1362
+ else
1363
+ [opts[:index], source || []]
1364
+ end
1365
+ end
1366
+
1367
+ def guard_sizes!
1368
+ if @index.size > @data.size
1369
+ cast(dtype: :array) # NM with nils seg faults
1370
+ @data.fill(nil, @data.size...@index.size)
1371
+ elsif @index.size < @data.size
1372
+ raise IndexError, "Expected index size >= vector size. Index size : #{@index.size}, vector size : #{@data.size}"
1373
+ end
1374
+ end
1375
+
1376
+ def guard_type_check value
1377
+ @possibly_changed_type = true \
1378
+ if object? && (value.nil? || value.is_a?(Numeric)) ||
1379
+ numeric? && !value.is_a?(Numeric) && !value.nil?
1380
+ end
1381
+
1382
+ def split_value key, v
1383
+ case
1384
+ when v.nil? then nil
1385
+ when v.include?(key) then 1
1386
+ else 0
1387
+ end
1388
+ end
1389
+
1164
1390
  # For an array or hash of estimators methods, returns
1165
1391
  # an array with three elements
1166
1392
  # 1.- A hash with estimators names as keys and lambdas as values
@@ -1180,18 +1406,6 @@ module Daru
1180
1406
  [h_est, h_est.keys, bss]
1181
1407
  end
1182
1408
 
1183
- def keep? a, b, order
1184
- eval = yield(a, b)
1185
- if order == :ascending
1186
- return true if eval == -1
1187
- return false if eval == 1
1188
- elsif order == :descending
1189
- return false if eval == -1
1190
- return true if eval == 1
1191
- end
1192
- false
1193
- end
1194
-
1195
1409
  # Note: To maintain sanity, this _MUST_ be the _ONLY_ place in daru where the
1196
1410
  # @dtype variable is set and the underlying data type of vector changed.
1197
1411
  def cast_vector_to dtype, source=nil, nm_dtype=nil
@@ -1203,25 +1417,13 @@ module Daru
1203
1417
  when :nmatrix then Daru::Accessors::NMatrixWrapper.new(source, self, nm_dtype)
1204
1418
  when :gsl then Daru::Accessors::GSLWrapper.new(source, self)
1205
1419
  when :mdarray then raise NotImplementedError, 'MDArray not yet supported.'
1206
- else raise "Unknown dtype #{dtype}"
1420
+ else raise ArgumentError, "Unknown dtype #{dtype}"
1207
1421
  end
1208
1422
 
1209
1423
  @dtype = dtype || :array
1210
1424
  new_vector
1211
1425
  end
1212
1426
 
1213
- def index_for index
1214
- if @index.include?(index)
1215
- @index[index]
1216
- elsif index.is_a?(Numeric)
1217
- index
1218
- end
1219
- end
1220
-
1221
- def set_size
1222
- @size = @data.size
1223
- end
1224
-
1225
1427
  def set_name name # rubocop:disable Style/AccessorMethodName
1226
1428
  @name =
1227
1429
  if name.is_a?(Numeric) then name
@@ -1232,38 +1434,109 @@ module Daru
1232
1434
  end
1233
1435
  end
1234
1436
 
1235
- def set_missing_positions
1236
- @missing_positions = []
1237
- @index.each do |e|
1238
- @missing_positions << e if @missing_values.key?(self[e])
1437
+ # Raises IndexError when one of the positions is an invalid position
1438
+ def validate_positions *positions
1439
+ positions = [positions] if positions.is_a? Integer
1440
+ positions.each do |pos|
1441
+ raise IndexError, "#{pos} is not a valid position." if pos >= size
1239
1442
  end
1240
1443
  end
1241
1444
 
1242
- def try_create_index potential_index
1243
- if potential_index.is_a?(Daru::MultiIndex) || potential_index.is_a?(Daru::Index)
1244
- potential_index
1445
+ # coerce ranges, integers and array in appropriate ways
1446
+ def coerce_positions *positions
1447
+ if positions.size == 1
1448
+ case positions.first
1449
+ when Integer
1450
+ positions.first
1451
+ when Range
1452
+ size.times.to_a[positions.first]
1453
+ else
1454
+ raise ArgumentError, 'Unkown position type.'
1455
+ end
1245
1456
  else
1246
- Daru::Index.new(potential_index)
1457
+ positions
1247
1458
  end
1248
1459
  end
1249
1460
 
1250
- def element_from_numeric_index location
1251
- pos = index_for location
1252
- pos ? @data[pos] : nil
1461
+ # Helper method for []=.
1462
+ # Assigs existing index to another value
1463
+ def modify_vector(indexes, val)
1464
+ positions = @index.pos(*indexes)
1465
+
1466
+ if positions.is_a? Numeric
1467
+ @data[positions] = val
1468
+ else
1469
+ positions.each { |pos| @data[pos] = val }
1470
+ end
1253
1471
  end
1254
1472
 
1255
- # Setup missing_values. The missing_values instance variable is set
1256
- # as a Hash for faster lookup times.
1257
- def set_missing_values values_arry # rubocop:disable Style/AccessorMethodName
1258
- @missing_values = {}
1259
- @missing_values[nil] = 0
1260
- if values_arry
1261
- values_arry.each do |e|
1262
- # If dtype is :gsl then missing values have to be converted to float
1263
- e = e.to_f if dtype == :gsl && e.is_a?(Numeric)
1264
- @missing_values[e] = 0
1473
+ # Helper method for []=.
1474
+ # Add a new index and assign it value
1475
+ def insert_vector(indexes, val)
1476
+ new_index = @index.add(*indexes)
1477
+ # May be create +=
1478
+ (new_index.size - @index.size).times { @data << val }
1479
+ @index = new_index
1480
+ end
1481
+
1482
+ # Works similar to #[]= but also insert the vector in case index is not valid
1483
+ # It is there only to be accessed by Daru::DataFrame and not meant for user.
1484
+ def set indexes, val
1485
+ cast(dtype: :array) if val.nil? && dtype != :array
1486
+ guard_type_check(val)
1487
+
1488
+ if @index.valid?(*indexes)
1489
+ modify_vector(indexes, val)
1490
+ else
1491
+ insert_vector(indexes, val)
1492
+ end
1493
+
1494
+ update_position_cache
1495
+ end
1496
+
1497
+ def cut_find_category partitions, val, close_at
1498
+ case close_at
1499
+ when :right
1500
+ right_index = partitions.index { |i| i > val }
1501
+ raise ArgumentError, 'Invalid partition' if right_index.nil?
1502
+ left_index = right_index - 1
1503
+ "#{partitions[left_index]}-#{partitions[right_index]-1}"
1504
+ when :left
1505
+ right_index = partitions.index { |i| i >= val }
1506
+ raise ArgumentError, 'Invalid partition' if right_index.nil?
1507
+ left_index = right_index - 1
1508
+ "#{partitions[left_index]+1}-#{partitions[right_index]}"
1509
+ else
1510
+ raise ArgumentError, "Invalid parameter #{close_at} to close_at."
1511
+ end
1512
+ end
1513
+
1514
+ def cut_categories partitions, close_at
1515
+ case close_at
1516
+ when :right
1517
+ Array.new(partitions.size-1) do |left_index|
1518
+ "#{partitions[left_index]}-#{partitions[left_index+1]-1}"
1519
+ end
1520
+ when :left
1521
+ Array.new(partitions.size-1) do |left_index|
1522
+ "#{partitions[left_index]+1}-#{partitions[left_index+1]}"
1265
1523
  end
1266
1524
  end
1267
1525
  end
1526
+
1527
+ def include_with_nan? array, value
1528
+ # Returns true if value is included in array.
1529
+ # Similar to include? but also works if value is Float::NAN
1530
+ if value.respond_to?(:nan?) && value.nan?
1531
+ array.any? { |i| i.respond_to?(:nan?) && i.nan? }
1532
+ else
1533
+ array.include? value
1534
+ end
1535
+ end
1536
+
1537
+ def update_position_cache
1538
+ @nil_positions = nil
1539
+ @nan_positions = nil
1540
+ end
1268
1541
  end
1269
1542
  end