daru 0.1.3.1 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/.rspec +2 -1
  4. data/.rspec_formatter.rb +33 -0
  5. data/.rubocop.yml +26 -2
  6. data/History.md +38 -0
  7. data/README.md +22 -13
  8. data/Rakefile +50 -2
  9. data/benchmarks/csv_reading.rb +22 -0
  10. data/daru.gemspec +9 -2
  11. data/lib/daru.rb +36 -4
  12. data/lib/daru/accessors/array_wrapper.rb +6 -1
  13. data/lib/daru/accessors/dataframe_by_row.rb +10 -2
  14. data/lib/daru/accessors/gsl_wrapper.rb +1 -3
  15. data/lib/daru/accessors/nmatrix_wrapper.rb +9 -0
  16. data/lib/daru/category.rb +935 -0
  17. data/lib/daru/core/group_by.rb +29 -38
  18. data/lib/daru/core/merge.rb +186 -145
  19. data/lib/daru/core/query.rb +22 -11
  20. data/lib/daru/dataframe.rb +976 -885
  21. data/lib/daru/date_time/index.rb +166 -166
  22. data/lib/daru/date_time/offsets.rb +66 -77
  23. data/lib/daru/formatters/table.rb +54 -0
  24. data/lib/daru/helpers/array.rb +40 -0
  25. data/lib/daru/index.rb +476 -73
  26. data/lib/daru/io/io.rb +66 -45
  27. data/lib/daru/io/sql_data_source.rb +33 -62
  28. data/lib/daru/iruby/helpers.rb +38 -0
  29. data/lib/daru/iruby/templates/dataframe.html.erb +52 -0
  30. data/lib/daru/iruby/templates/dataframe_mi.html.erb +58 -0
  31. data/lib/daru/iruby/templates/multi_index.html.erb +12 -0
  32. data/lib/daru/iruby/templates/vector.html.erb +27 -0
  33. data/lib/daru/iruby/templates/vector_mi.html.erb +36 -0
  34. data/lib/daru/maths/arithmetic/dataframe.rb +16 -18
  35. data/lib/daru/maths/arithmetic/vector.rb +4 -6
  36. data/lib/daru/maths/statistics/dataframe.rb +8 -15
  37. data/lib/daru/maths/statistics/vector.rb +120 -98
  38. data/lib/daru/monkeys.rb +12 -40
  39. data/lib/daru/plotting/gruff.rb +3 -0
  40. data/lib/daru/plotting/gruff/category.rb +49 -0
  41. data/lib/daru/plotting/gruff/dataframe.rb +91 -0
  42. data/lib/daru/plotting/gruff/vector.rb +57 -0
  43. data/lib/daru/plotting/nyaplot.rb +3 -0
  44. data/lib/daru/plotting/nyaplot/category.rb +34 -0
  45. data/lib/daru/plotting/nyaplot/dataframe.rb +187 -0
  46. data/lib/daru/plotting/nyaplot/vector.rb +46 -0
  47. data/lib/daru/vector.rb +694 -421
  48. data/lib/daru/version.rb +1 -1
  49. data/profile/_base.rb +23 -0
  50. data/profile/df_to_a.rb +10 -0
  51. data/profile/filter.rb +13 -0
  52. data/profile/joining.rb +13 -0
  53. data/profile/sorting.rb +12 -0
  54. data/profile/vector_each_with_index.rb +9 -0
  55. data/spec/accessors/wrappers_spec.rb +2 -4
  56. data/spec/categorical_spec.rb +1734 -0
  57. data/spec/core/group_by_spec.rb +52 -2
  58. data/spec/core/merge_spec.rb +63 -2
  59. data/spec/core/query_spec.rb +236 -80
  60. data/spec/dataframe_spec.rb +1373 -79
  61. data/spec/date_time/data_spec.rb +3 -5
  62. data/spec/date_time/index_spec.rb +154 -17
  63. data/spec/date_time/offsets_spec.rb +3 -4
  64. data/spec/fixtures/empties.dat +2 -0
  65. data/spec/fixtures/strings.dat +2 -0
  66. data/spec/formatters/table_formatter_spec.rb +99 -0
  67. data/spec/helpers_spec.rb +8 -0
  68. data/spec/index/categorical_index_spec.rb +168 -0
  69. data/spec/index/index_spec.rb +283 -0
  70. data/spec/index/multi_index_spec.rb +570 -0
  71. data/spec/io/io_spec.rb +31 -4
  72. data/spec/io/sql_data_source_spec.rb +0 -1
  73. data/spec/iruby/dataframe_spec.rb +172 -0
  74. data/spec/iruby/helpers_spec.rb +49 -0
  75. data/spec/iruby/multi_index_spec.rb +37 -0
  76. data/spec/iruby/vector_spec.rb +107 -0
  77. data/spec/math/arithmetic/dataframe_spec.rb +71 -13
  78. data/spec/math/arithmetic/vector_spec.rb +8 -10
  79. data/spec/math/statistics/dataframe_spec.rb +3 -5
  80. data/spec/math/statistics/vector_spec.rb +45 -55
  81. data/spec/monkeys_spec.rb +32 -9
  82. data/spec/plotting/dataframe_spec.rb +386 -0
  83. data/spec/plotting/vector_spec.rb +230 -0
  84. data/spec/shared/vector_display_spec.rb +215 -0
  85. data/spec/spec_helper.rb +23 -0
  86. data/spec/vector_spec.rb +905 -138
  87. metadata +143 -11
  88. data/.rubocop_todo.yml +0 -44
  89. data/lib/daru/plotting/dataframe.rb +0 -104
  90. data/lib/daru/plotting/vector.rb +0 -38
  91. data/spec/daru_spec.rb +0 -58
  92. data/spec/index_spec.rb +0 -375
@@ -0,0 +1,46 @@
1
+ module Daru
2
+ module Plotting
3
+ module Vector
4
+ module NyaplotLibrary
5
+ # Plots a Vector with Nyaplot on IRuby using the given options. Yields the
6
+ # plot object (Nyaplot::Plot) and the diagram object (Nyaplot::Diagram)
7
+ # to the block, which can be used for setting various options as per the
8
+ # Nyaplot API.
9
+ #
10
+ # == Options
11
+ # type (:scatter, :bar, :histogram), title, x_label, y_label, color(true/false)
12
+ #
13
+ # == Usage
14
+ # vector = Daru::Vector.new [10,20,30,40], [:one, :two, :three, :four]
15
+ # vector.plot(type: :bar) do |plot|
16
+ # plot.title "My first plot"
17
+ # plot.width 1200
18
+ # end
19
+ def plot opts={}
20
+ options = {
21
+ type: :scatter
22
+ }.merge(opts)
23
+
24
+ x_axis = options[:type] == :scatter ? Array.new(size) { |i| i } : @index.to_a
25
+ plot = Nyaplot::Plot.new
26
+ diagram = create_diagram plot, options[:type], x_axis
27
+
28
+ yield plot, diagram if block_given?
29
+
30
+ plot.show
31
+ end
32
+
33
+ private
34
+
35
+ def create_diagram plot, type, x_axis
36
+ case type
37
+ when :box, :histogram
38
+ plot.add(type, @data.to_a)
39
+ else
40
+ plot.add(type, x_axis, @data.to_a)
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -1,16 +1,98 @@
1
1
  require 'daru/maths/arithmetic/vector.rb'
2
2
  require 'daru/maths/statistics/vector.rb'
3
- require 'daru/plotting/vector.rb'
3
+ require 'daru/plotting/gruff.rb'
4
+ require 'daru/plotting/nyaplot.rb'
4
5
  require 'daru/accessors/array_wrapper.rb'
5
6
  require 'daru/accessors/nmatrix_wrapper.rb'
6
7
  require 'daru/accessors/gsl_wrapper.rb'
8
+ require 'daru/category.rb'
7
9
 
8
10
  module Daru
9
- class Vector
11
+ class Vector # rubocop:disable Metrics/ClassLength
10
12
  include Enumerable
11
13
  include Daru::Maths::Arithmetic::Vector
12
14
  include Daru::Maths::Statistics::Vector
13
- include Daru::Plotting::Vector if Daru.has_nyaplot?
15
+ extend Gem::Deprecate
16
+
17
+ class << self
18
+ # Create a new vector by specifying the size and an optional value
19
+ # and block to generate values.
20
+ #
21
+ # == Description
22
+ #
23
+ # The *new_with_size* class method lets you create a Daru::Vector
24
+ # by specifying the size as the argument. The optional block, if
25
+ # supplied, is run once for populating each element in the Vector.
26
+ #
27
+ # The result of each run of the block is the value that is ultimately
28
+ # assigned to that position in the Vector.
29
+ #
30
+ # == Options
31
+ # :value
32
+ # All the rest like .new
33
+ def new_with_size n, opts={}, &block
34
+ value = opts.delete :value
35
+ block ||= ->(_) { value }
36
+ Daru::Vector.new Array.new(n, &block), opts
37
+ end
38
+
39
+ # Create a vector using (almost) any object
40
+ # * Array: flattened
41
+ # * Range: transformed using to_a
42
+ # * Daru::Vector
43
+ # * Numeric and string values
44
+ #
45
+ # == Description
46
+ #
47
+ # The `Vector.[]` class method creates a vector from almost any
48
+ # object that has a `#to_a` method defined on it. It is similar
49
+ # to R's `c` method.
50
+ #
51
+ # == Usage
52
+ #
53
+ # a = Daru::Vector[1,2,3,4,6..10]
54
+ # #=>
55
+ # # <Daru::Vector:99448510 @name = nil @size = 9 >
56
+ # # nil
57
+ # # 0 1
58
+ # # 1 2
59
+ # # 2 3
60
+ # # 3 4
61
+ # # 4 6
62
+ # # 5 7
63
+ # # 6 8
64
+ # # 7 9
65
+ # # 8 10
66
+ def [](*indexes)
67
+ values = indexes.map do |a|
68
+ a.respond_to?(:to_a) ? a.to_a : a
69
+ end.flatten
70
+ Daru::Vector.new(values)
71
+ end
72
+
73
+ def _load(data) # :nodoc:
74
+ h = Marshal.load(data)
75
+ Daru::Vector.new(h[:data],
76
+ index: h[:index],
77
+ name: h[:name],
78
+ dtype: h[:dtype], missing_values: h[:missing_values])
79
+ end
80
+
81
+ def coerce(data, options={})
82
+ case data
83
+ when Daru::Vector
84
+ data
85
+ when Array, Hash
86
+ new(data, options)
87
+ else
88
+ raise ArgumentError, "Can't coerce #{data.class} to #{self}"
89
+ end
90
+ end
91
+ end
92
+
93
+ def size
94
+ @data.size
95
+ end
14
96
 
15
97
  def each(&block)
16
98
  return to_enum(:each) unless block_given?
@@ -26,17 +108,17 @@ module Daru
26
108
  self
27
109
  end
28
110
 
29
- def each_with_index
111
+ def each_with_index &block
30
112
  return to_enum(:each_with_index) unless block_given?
31
113
 
32
- @index.each { |i| yield(self[i], i) }
114
+ @data.to_a.zip(@index.to_a).each(&block)
115
+
33
116
  self
34
117
  end
35
118
 
36
119
  def map!(&block)
37
120
  return to_enum(:map!) unless block_given?
38
121
  @data.map!(&block)
39
- update
40
122
  self
41
123
  end
42
124
 
@@ -44,8 +126,6 @@ module Daru
44
126
  attr_reader :name
45
127
  # The row index. Can be either Daru::Index or Daru::MultiIndex.
46
128
  attr_reader :index
47
- # The total number of elements of the vector.
48
- attr_reader :size
49
129
  # The underlying dtype of the Vector. Can be either :array, :nmatrix or :gsl.
50
130
  attr_reader :dtype
51
131
  # If the dtype is :nmatrix, this attribute represents the data type of the
@@ -54,13 +134,16 @@ module Daru
54
134
  attr_reader :nm_dtype
55
135
  # An Array or the positions in the vector that are being treated as 'missing'.
56
136
  attr_reader :missing_positions
137
+ deprecate :missing_positions, :indexes, 2016, 10
57
138
  # Store a hash of labels for values. Supplementary only. Recommend using index
58
139
  # for proper usage.
59
140
  attr_accessor :labels
60
141
  # Store vector data in an array
61
142
  attr_reader :data
62
- # Attach arbitrary metadata to vector (usu a hash)
63
- attr_accessor :metadata
143
+ # Ploting library being used for this vector
144
+ attr_reader :plotting_library
145
+ # TODO: Make private.
146
+ attr_reader :nil_positions, :nan_positions
64
147
 
65
148
  # Create a Vector object.
66
149
  #
@@ -93,102 +176,27 @@ module Daru
93
176
  # vecarr = Daru::Vector.new [1,2,3,4], index: [:a, :e, :i, :o]
94
177
  # vechsh = Daru::Vector.new({a: 1, e: 2, i: 3, o: 4})
95
178
  def initialize source, opts={}
96
- index = nil
97
- if source.is_a?(Hash)
98
- index = source.keys
99
- source = source.values
179
+ if opts[:type] == :category
180
+ # Initialize category type vector
181
+ extend Daru::Category
182
+ initialize_category source, opts
100
183
  else
101
- index = opts[:index]
102
- source ||= []
184
+ # Initialize non-category type vector
185
+ initialize_vector source, opts
103
186
  end
104
- name = opts[:name]
105
- set_name name
106
-
107
- @metadata = opts[:metadata] || {}
108
-
109
- @data = cast_vector_to(opts[:dtype] || :array, source, opts[:nm_dtype])
110
- @index = try_create_index(index || @data.size)
111
-
112
- if @index.size > @data.size
113
- cast(dtype: :array) # NM with nils seg faults
114
- (@index.size - @data.size).times { @data << nil }
115
- elsif @index.size < @data.size
116
- raise IndexError, "Expected index size >= vector size. Index size : #{@index.size}, vector size : #{@data.size}"
117
- end
118
-
119
- @possibly_changed_type = true
120
- set_missing_values opts[:missing_values]
121
- set_missing_positions
122
- set_size
123
187
  end
124
188
 
125
- # Create a new vector by specifying the size and an optional value
126
- # and block to generate values.
127
- #
128
- # == Description
129
- #
130
- # The *new_with_size* class method lets you create a Daru::Vector
131
- # by specifying the size as the argument. The optional block, if
132
- # supplied, is run once for populating each element in the Vector.
133
- #
134
- # The result of each run of the block is the value that is ultimately
135
- # assigned to that position in the Vector.
136
- #
137
- # == Options
138
- # :value
139
- # All the rest like .new
140
- def self.new_with_size n, opts={}, &block
141
- value = opts[:value]
142
- opts.delete :value
143
- if block
144
- Daru::Vector.new Array.new(n) { |i| block.call(i) }, opts
189
+ def plotting_library= lib
190
+ case lib
191
+ when :gruff, :nyaplot
192
+ @plotting_library = lib
193
+ extend Module.const_get(
194
+ "Daru::Plotting::Vector::#{lib.to_s.capitalize}Library"
195
+ ) if Daru.send("has_#{lib}?".to_sym)
145
196
  else
146
- Daru::Vector.new Array.new(n) { value }, opts
147
- end
148
- end
149
-
150
- # Create a vector using (almost) any object
151
- # * Array: flattened
152
- # * Range: transformed using to_a
153
- # * Daru::Vector
154
- # * Numeric and string values
155
- #
156
- # == Description
157
- #
158
- # The `Vector.[]` class method creates a vector from almost any
159
- # object that has a `#to_a` method defined on it. It is similar
160
- # to R's `c` method.
161
- #
162
- # == Usage
163
- #
164
- # a = Daru::Vector[1,2,3,4,6..10]
165
- # #=>
166
- # # <Daru::Vector:99448510 @name = nil @size = 9 >
167
- # # nil
168
- # # 0 1
169
- # # 1 2
170
- # # 2 3
171
- # # 3 4
172
- # # 4 6
173
- # # 5 7
174
- # # 6 8
175
- # # 7 9
176
- # # 8 10
177
- def self.[](*args)
178
- values = []
179
- args.each do |a|
180
- case a
181
- when Array
182
- values.concat a.flatten
183
- when Daru::Vector
184
- values.concat a.to_a
185
- when Range
186
- values.concat a.to_a
187
- else
188
- values << a
189
- end
197
+ raise ArguementError, "Plotting library #{lib} not supported. "\
198
+ 'Supported libraries are :nyaplot and :gruff'
190
199
  end
191
- Daru::Vector.new(values)
192
200
  end
193
201
 
194
202
  # Get one or more elements with specified index or a range.
@@ -203,19 +211,63 @@ module Daru
203
211
  # # For vectors employing hierarchial multi index
204
212
  #
205
213
  def [](*input_indexes)
206
- # Get a proper index object
207
- indexes = @index[*input_indexes]
214
+ # Get array of positions indexes
215
+ positions = @index.pos(*input_indexes)
208
216
 
209
217
  # If one object is asked return it
210
- return @data[indexes] if indexes.is_a? Numeric
218
+ return @data[positions] if positions.is_a? Numeric
211
219
 
212
- # Form a new Vector using indexes and return it
220
+ # Form a new Vector using positional indexes
213
221
  Daru::Vector.new(
214
- indexes.map { |loc| @data[@index[loc]] },
215
- name: @name, metadata: @metadata.dup, index: indexes.conform(input_indexes), dtype: @dtype
222
+ positions.map { |loc| @data[loc] },
223
+ name: @name,
224
+ index: @index.subset(*input_indexes), dtype: @dtype
216
225
  )
217
226
  end
218
227
 
228
+ # Returns vector of values given positional values
229
+ # @param [Array<object>] *positions positional values
230
+ # @return [object] vector
231
+ # @example
232
+ # dv = Daru::Vector.new 'a'..'e'
233
+ # dv.at 0, 1, 2
234
+ # # => #<Daru::Vector(3)>
235
+ # # 0 a
236
+ # # 1 b
237
+ # # 2 c
238
+ def at *positions
239
+ # to be used to form index
240
+ original_positions = positions
241
+ positions = coerce_positions(*positions)
242
+ validate_positions(*positions)
243
+
244
+ if positions.is_a? Integer
245
+ @data[positions]
246
+ else
247
+ values = positions.map { |pos| @data[pos] }
248
+ Daru::Vector.new values, index: @index.at(*original_positions), dtype: dtype
249
+ end
250
+ end
251
+
252
+ # Change value at given positions
253
+ # @param [Array<object>] *positions positional values
254
+ # @param [object] val value to assign
255
+ # @example
256
+ # dv = Daru::Vector.new 'a'..'e'
257
+ # dv.set_at [0, 1], 'x'
258
+ # dv
259
+ # # => #<Daru::Vector(5)>
260
+ # # 0 x
261
+ # # 1 x
262
+ # # 2 c
263
+ # # 3 d
264
+ # # 4 e
265
+ def set_at positions, val
266
+ validate_positions(*positions)
267
+ positions.map { |pos| @data[pos] = val }
268
+ update_position_cache
269
+ end
270
+
219
271
  # Just like in Hashes, you can specify the index label of the Daru::Vector
220
272
  # and assign an element an that place in the Daru::Vector.
221
273
  #
@@ -229,57 +281,14 @@ module Daru
229
281
  # # a 999
230
282
  # # b 2
231
283
  # # c 3
232
- def []=(*location, value)
233
- cast(dtype: :array) if value.nil? && dtype != :array
234
-
235
- @possibly_changed_type = true if @type == :object && (value.nil? ||
236
- value.is_a?(Numeric))
237
- @possibly_changed_type = true if @type == :numeric && (!value.is_a?(Numeric) &&
238
- !value.nil?)
239
-
240
- pos = @index[*location]
241
-
242
- if pos.is_a?(Numeric)
243
- @data[pos] = value
244
- else
245
- begin
246
- pos.each { |tuple| self[tuple] = value }
247
- rescue NoMethodError
248
- raise IndexError, "Specified index #{pos.inspect} does not exist."
249
- end
250
- end
251
-
252
- set_size
253
- set_missing_positions unless Daru.lazy_update
254
- end
284
+ def []=(*indexes, val)
285
+ cast(dtype: :array) if val.nil? && dtype != :array
255
286
 
256
- # The values to be treated as 'missing'. *nil* is the default missing
257
- # type. To set missing values see the missing_values= method.
258
- def missing_values
259
- @missing_values.keys
260
- end
287
+ guard_type_check(val)
261
288
 
262
- # Assign an Array to treat certain values as 'missing'.
263
- #
264
- # == Usage
265
- #
266
- # v = Daru::Vector.new [1,2,3,4,5]
267
- # v.missing_values = [3]
268
- # v.update
269
- # v.missing_positions
270
- # #=> [2]
271
- def missing_values= values
272
- set_missing_values values
273
- set_missing_positions unless Daru.lazy_update
274
- end
289
+ modify_vector(indexes, val)
275
290
 
276
- # Method for updating the metadata (i.e. missing value positions) of the
277
- # after assingment/deletion etc. are complete. This is provided so that
278
- # time is not wasted in creating the metadata for the vector each time
279
- # assignment/deletion of elements is done. Updating data this way is called
280
- # lazy loading. To set or unset lazy loading, see the .lazy_update= method.
281
- def update
282
- Daru.lazy_update and set_missing_positions
291
+ update_position_cache
283
292
  end
284
293
 
285
294
  # Two vectors are equal if the have the exact same index values corresponding
@@ -287,7 +296,7 @@ module Daru
287
296
  def == other
288
297
  case other
289
298
  when Daru::Vector
290
- @index == other.index && @size == other.size &&
299
+ @index == other.index && size == other.size &&
291
300
  @index.all? { |index| self[index] == other[index] }
292
301
  else
293
302
  super
@@ -405,8 +414,8 @@ module Daru
405
414
  # # 11 5
406
415
  # # 13 5
407
416
  # # 15 1
408
- def where bool_arry
409
- Daru::Core::Query.vector_where @data.to_a, @index.to_a, bool_arry, dtype
417
+ def where bool_array
418
+ Daru::Core::Query.vector_where self, bool_array
410
419
  end
411
420
 
412
421
  def head q=10
@@ -414,18 +423,41 @@ module Daru
414
423
  end
415
424
 
416
425
  def tail q=10
417
- self[(@size - q)..(@size-1)]
426
+ start = [size - q, 0].max
427
+ self[start..(size-1)]
418
428
  end
419
429
 
420
430
  def empty?
421
431
  @index.empty?
422
432
  end
423
433
 
434
+ def numeric?
435
+ type == :numeric
436
+ end
437
+
438
+ def object?
439
+ type == :object
440
+ end
441
+
424
442
  # Reports whether missing data is present in the Vector.
425
443
  def has_missing_data?
426
- !missing_positions.empty?
444
+ !indexes(*Daru::MISSING_VALUES).empty?
427
445
  end
428
446
  alias :flawed? :has_missing_data?
447
+ deprecate :has_missing_data?, :include_values?, 2016, 10
448
+ deprecate :flawed?, :include_values?, 2016, 10
449
+
450
+ # Check if any one of mentioned values occur in the vector
451
+ # @param [Array] *values values to check for
452
+ # @return [true, false] returns true if any one of specified values
453
+ # occur in the vector
454
+ # @example
455
+ # dv = Daru::Vector.new [1, 2, 3, 4, nil]
456
+ # dv.include_values? nil, Float::NAN
457
+ # # => true
458
+ def include_values?(*values)
459
+ values.any? { |v| include_with_nan? @data, v }
460
+ end
429
461
 
430
462
  # Append an element to the vector by specifying the element and index
431
463
  def concat element, index
@@ -434,8 +466,7 @@ module Daru
434
466
  @index |= [index]
435
467
  @data[@index[index]] = element
436
468
 
437
- set_size
438
- set_missing_positions unless Daru.lazy_update
469
+ update_position_cache
439
470
  end
440
471
  alias :push :concat
441
472
  alias :<< :concat
@@ -463,8 +494,7 @@ module Daru
463
494
  @data.delete_at @index[index]
464
495
  @index = Daru::Index.new(@index.to_a - [index])
465
496
 
466
- set_size
467
- set_missing_positions unless Daru.lazy_update
497
+ update_position_cache
468
498
  end
469
499
 
470
500
  # The type of data contained in the vector. Can be :object or :numeric. If
@@ -489,6 +519,16 @@ module Daru
489
519
  @type
490
520
  end
491
521
 
522
+ # Tells if vector is categorical or not.
523
+ # @return [true, false] true if vector is of type category, false otherwise
524
+ # @example
525
+ # dv = Daru::Vector.new [1, 2, 3], type: :category
526
+ # dv.category?
527
+ # # => true
528
+ def category?
529
+ type == :category
530
+ end
531
+
492
532
  # Get index of element
493
533
  def index_of element
494
534
  case dtype
@@ -500,11 +540,9 @@ module Daru
500
540
  # Keep only unique elements of the vector alongwith their indexes.
501
541
  def uniq
502
542
  uniq_vector = @data.uniq
503
- new_index = uniq_vector.each_with_object([]) do |element, acc|
504
- acc << index_of(element)
505
- end
543
+ new_index = uniq_vector.map { |element| index_of(element) }
506
544
 
507
- Daru::Vector.new uniq_vector, name: @name, metadata: @metadata.dup, index: new_index, dtype: @dtype
545
+ Daru::Vector.new uniq_vector, name: @name, index: new_index, dtype: @dtype
508
546
  end
509
547
 
510
548
  def any? &block
@@ -531,47 +569,46 @@ module Daru
531
569
  # v = Daru::Vector.new ["My first guitar", "jazz", "guitar"]
532
570
  # # Say you want to sort these strings by length.
533
571
  # v.sort(ascending: false) { |a,b| a.length <=> b.length }
534
- def sort opts={}
535
- opts = {
536
- ascending: true
537
- }.merge(opts)
538
-
539
- vector_index = @data.each_with_index
540
- vector_index =
541
- if block_given?
542
- vector_index.sort { |a,b| yield(a[0], b[0]) }
543
- else
544
- vector_index.sort { |(av, ai), (bv, bi)|
545
- if !av.nil? && !bv.nil?
546
- av <=> bv
547
- elsif av.nil? && bv.nil?
548
- ai <=> bi
549
- elsif av.nil?
550
- opts[:ascending] ? -1 : 1
551
- else
552
- opts[:ascending] ? 1 : -1
553
- end
554
- }
555
- end
556
- vector_index.reverse! unless opts[:ascending]
572
+ def sort opts={}, &block
573
+ opts = {ascending: true}.merge(opts)
574
+
575
+ vector_index = resort_index(@data.each_with_index, opts, &block)
557
576
  vector, index = vector_index.transpose
558
- old_index = @index.to_a
559
- index = index.map { |i| old_index[i] }
560
577
 
561
- Daru::Vector.new(vector, index: index, name: @name, metadata: @metadata.dup, dtype: @dtype)
578
+ index = @index.reorder index
579
+
580
+ Daru::Vector.new(vector, index: index, name: @name, dtype: @dtype)
581
+ end
582
+
583
+ DEFAULT_SORTER = lambda { |(lv, li), (rv, ri)|
584
+ case
585
+ when lv.nil? && rv.nil?
586
+ li <=> ri
587
+ when lv.nil?
588
+ -1
589
+ when rv.nil?
590
+ 1
591
+ else
592
+ lv <=> rv
593
+ end
594
+ }
595
+
596
+ def resort_index vector_index, opts
597
+ if block_given?
598
+ vector_index.sort { |(lv, _li), (rv, _ri)| yield(lv, rv) }
599
+ else
600
+ vector_index.sort(&DEFAULT_SORTER)
601
+ end
602
+ .tap { |res| res.reverse! unless opts[:ascending] }
562
603
  end
563
604
 
564
605
  # Just sort the data and get an Array in return using Enumerable#sort.
565
606
  # Non-destructive.
607
+ # :nocov:
566
608
  def sorted_data &block
567
609
  @data.to_a.sort(&block)
568
610
  end
569
-
570
- # Returns *true* if the value passed is actually exists or is not marked as
571
- # a *missing value*.
572
- def exists? value
573
- !@missing_values.key?(self[index_of(value)])
574
- end
611
+ # :nocov:
575
612
 
576
613
  # Like map, but returns a Daru::Vector with the returned values.
577
614
  def recode dt=nil, &block
@@ -593,19 +630,12 @@ module Daru
593
630
  def delete_if
594
631
  return to_enum(:delete_if) unless block_given?
595
632
 
596
- keep_e = []
597
- keep_i = []
598
- each_with_index do |n, i|
599
- unless yield(n)
600
- keep_e << n
601
- keep_i << i
602
- end
603
- end
633
+ keep_e, keep_i = each_with_index.select { |n, _i| !yield(n) }.transpose
604
634
 
605
635
  @data = cast_vector_to @dtype, keep_e
606
636
  @index = Daru::Index.new(keep_i)
607
- set_missing_positions unless Daru.lazy_update
608
- set_size
637
+
638
+ update_position_cache
609
639
 
610
640
  self
611
641
  end
@@ -614,32 +644,16 @@ module Daru
614
644
  def keep_if
615
645
  return to_enum(:keep_if) unless block_given?
616
646
 
617
- keep_e = []
618
- keep_i = []
619
- each_with_index do |n, i|
620
- if yield(n)
621
- keep_e << n
622
- keep_i << i
623
- end
624
- end
625
-
626
- @data = cast_vector_to @dtype, keep_e
627
- @index = Daru::Index.new(keep_i)
628
- set_missing_positions unless Daru.lazy_update
629
- set_size
630
-
631
- self
647
+ delete_if { |val| !yield(val) }
632
648
  end
633
649
 
634
650
  # Reports all values that doesn't comply with a condition.
635
651
  # Returns a hash with the index of data and the invalid data.
636
652
  def verify
637
- h = {}
638
- (0...size).each do |i|
639
- h[i] = @data[i] unless yield(@data[i])
640
- end
641
-
642
- h
653
+ (0...size)
654
+ .map { |i| [i, @data[i]] }
655
+ .reject { |_i, val| yield(val) }
656
+ .to_h
643
657
  end
644
658
 
645
659
  # Return an Array with the data splitted by a separator.
@@ -674,29 +688,19 @@ module Daru
674
688
  #
675
689
  def split_by_separator sep=','
676
690
  split_data = splitted sep
677
- factors = split_data.flatten.uniq.compact
678
-
679
- out = factors.map { |x| [x, []] }.to_h
680
-
681
- split_data.each do |r|
682
- if r.nil?
683
- factors.each do |f|
684
- out[f].push(nil)
685
- end
686
- else
687
- factors.each do |f|
688
- out[f].push(r.include?(f) ? 1 : 0)
689
- end
690
- end
691
- end
692
-
693
- out.map { |k, v| [k, Daru::Vector.new(v)] }.to_h
691
+ split_data
692
+ .flatten.uniq.compact.map do |key|
693
+ [
694
+ key,
695
+ Daru::Vector.new(split_data.map { |v| split_value(key, v) })
696
+ ]
697
+ end.to_h
694
698
  end
695
699
 
696
700
  def split_by_separator_freq(sep=',')
697
- split_by_separator(sep).map do |k, v|
698
- [k, v.inject { |s,x| s+x.to_i }]
699
- end.to_h
701
+ split_by_separator(sep).map { |k, v|
702
+ [k, v.map(&:to_i).inject(:+)]
703
+ }.to_h
700
704
  end
701
705
 
702
706
  def reset_index!
@@ -718,23 +722,15 @@ module Daru
718
722
  # # 1 false
719
723
  # # 2 false
720
724
  # # 3 true
725
+ #
721
726
  def is_nil?
722
- nil_truth_vector = clone_structure
723
- @index.each do |idx|
724
- nil_truth_vector[idx] = self[idx].nil? ? true : false
725
- end
726
-
727
- nil_truth_vector
727
+ # FIXME: EXTREMELY bad name for method not returning boolean - zverok, 2016-05-18
728
+ recode(&:nil?)
728
729
  end
729
730
 
730
731
  # Opposite of #is_nil?
731
732
  def not_nil?
732
- nil_truth_vector = clone_structure
733
- @index.each do |idx|
734
- nil_truth_vector[idx] = self[idx].nil? ? false : true
735
- end
736
-
737
- nil_truth_vector
733
+ recode { |v| !v.nil? }
738
734
  end
739
735
 
740
736
  # Replace all nils in the vector with the value passed as an argument. Destructive.
@@ -744,7 +740,7 @@ module Daru
744
740
  #
745
741
  # * +replacement+ - The value which should replace all nils
746
742
  def replace_nils! replacement
747
- missing_positions.each do |idx|
743
+ indexes(*Daru::MISSING_VALUES).each do |idx|
748
744
  self[idx] = replacement
749
745
  end
750
746
 
@@ -765,13 +761,13 @@ module Daru
765
761
  # ts.lag # => [nil, 0.69, 0.23, 0.44, ...]
766
762
  # ts.lag(2) # => [nil, nil, 0.69, 0.23, ...]
767
763
  def lag k=1
768
- return dup if k == 0
764
+ return dup if k.zero?
769
765
 
770
766
  dat = @data.to_a.dup
771
767
  (dat.size - 1).downto(k) { |i| dat[i] = dat[i - k] }
772
768
  (0...k).each { |i| dat[i] = nil }
773
769
 
774
- Daru::Vector.new(dat, index: @index, name: @name, metadata: @metadata.dup)
770
+ Daru::Vector.new(dat, index: @index, name: @name)
775
771
  end
776
772
 
777
773
  def detach_index
@@ -788,7 +784,19 @@ module Daru
788
784
 
789
785
  # number of non-missing elements
790
786
  def n_valid
791
- @size - missing_positions.size
787
+ size - indexes(*Daru::MISSING_VALUES).size
788
+ end
789
+ deprecate :n_valid, :count_values, 2016, 10
790
+
791
+ # Count the number of values specified
792
+ # @param [Array] *values values to count for
793
+ # @return [Integer] the number of times the values mentioned occurs
794
+ # @example
795
+ # dv = Daru::Vector.new [1, 2, 1, 2, 3, 4, nil, nil]
796
+ # dv.count_values nil
797
+ # # => 2
798
+ def count_values(*values)
799
+ positions(*values).size
792
800
  end
793
801
 
794
802
  # Returns *true* if an index exists
@@ -796,6 +804,11 @@ module Daru
796
804
  @index.include? index
797
805
  end
798
806
 
807
+ # @return [Daru::DataFrame] the vector as a single-vector dataframe
808
+ def to_df
809
+ Daru::DataFrame.new({@name => @data}, name: @name, index: @index)
810
+ end
811
+
799
812
  # Convert Vector to a horizontal or vertical Ruby Matrix.
800
813
  #
801
814
  # == Arguments
@@ -811,11 +824,39 @@ module Daru
811
824
  end
812
825
  end
813
826
 
827
+ # Convert vector to nmatrix object
828
+ # @param [Symbol] axis :horizontal or :vertical
829
+ # @return [NMatrix] NMatrix object containing all values of the vector
830
+ # @example
831
+ # dv = Daru::Vector.new [1, 2, 3]
832
+ # dv.to_nmatrix
833
+ # # =>
834
+ # # [
835
+ # # [1, 2, 3] ]
836
+ def to_nmatrix axis=:horizontal
837
+ raise ArgumentError, 'Can not convert to nmatrix'\
838
+ 'because the vector is numeric' unless numeric? && !include?(nil)
839
+
840
+ case axis
841
+ when :horizontal
842
+ NMatrix.new [1, size], to_a
843
+ when :vertical
844
+ NMatrix.new [size, 1], to_a
845
+ else
846
+ raise ArgumentError, 'Invalid axis specified. '\
847
+ 'Valid axis are :horizontal and :vertical'
848
+ end
849
+ end
850
+
814
851
  # If dtype != gsl, will convert data to GSL::Vector with to_a. Otherwise returns
815
852
  # the stored GSL::Vector object.
816
853
  def to_gsl
817
854
  raise NoMethodError, 'Install gsl-nmatrix for access to this functionality.' unless Daru.has_gsl?
818
- dtype == :gsl ? @data.data : GSL::Vector.alloc(only_valid(:array).to_a)
855
+ if dtype == :gsl
856
+ @data.data
857
+ else
858
+ GSL::Vector.alloc(reject_values(*Daru::MISSING_VALUES).to_a)
859
+ end
819
860
  end
820
861
 
821
862
  # Convert to hash (explicit). Hash keys are indexes and values are the correspoding elements
@@ -835,30 +876,12 @@ module Daru
835
876
 
836
877
  # Convert to html for iruby
837
878
  def to_html threshold=30
838
- name = @name || 'nil'
839
- html = '<table>' \
840
- '<tr>' \
841
- '<th colspan="2">' \
842
- "Daru::Vector:#{object_id} " + " size: #{size}" \
843
- '</th>' \
844
- '</tr>'
845
- html += '<tr><th> </th><th>' + name.to_s + '</th></tr>'
846
- @index.each_with_index do |index, num|
847
- html += '<tr><td>' + index.to_s + '</td>' + '<td>' + self[index].to_s + '</td></tr>'
848
-
849
- next if num <= threshold
850
- html += '<tr><td>...</td><td>...</td></tr>'
851
-
852
- last_index = @index.to_a.last
853
- html += '<tr>' \
854
- '<td>' + last_index.to_s + '</td>' \
855
- '<td>' + self[last_index].to_s + '</td>' \
856
- '</tr>'
857
- break
858
- end
859
- html += '</table>'
860
-
861
- html
879
+ path = if index.is_a?(MultiIndex)
880
+ File.expand_path('../iruby/templates/vector_mi.html.erb', __FILE__)
881
+ else
882
+ File.expand_path('../iruby/templates/vector.html.erb', __FILE__)
883
+ end
884
+ ERB.new(File.read(path).strip).result(binding)
862
885
  end
863
886
 
864
887
  def to_s
@@ -870,10 +893,11 @@ module Daru
870
893
  ReportBuilder.new(no_title: true).add(self).send(method)
871
894
  end
872
895
 
873
- def report_building b
896
+ # :nocov:
897
+ def report_building b # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
874
898
  b.section(name: name) do |s|
875
899
  s.text "n :#{size}"
876
- s.text "n valid:#{n_valid}"
900
+ s.text "n valid:#{count_values(*Daru::MISSING_VALUES)}"
877
901
  if @type == :object
878
902
  s.text "factors: #{factors.to_a.join(',')}"
879
903
  s.text "mode: #{mode}"
@@ -881,7 +905,7 @@ module Daru
881
905
  s.table(name: 'Distribution') do |t|
882
906
  frequencies.sort_by(&:to_s).each do |k,v|
883
907
  key = @index.include?(k) ? @index[k] : k
884
- t.row [key, v, ('%0.2f%%' % (v.quo(n_valid)*100))]
908
+ t.row [key, v, ('%0.2f%%' % (v.quo(count_values(*Daru::MISSING_VALUES))*100))]
885
909
  end
886
910
  end
887
911
  end
@@ -898,47 +922,71 @@ module Daru
898
922
  end
899
923
  end
900
924
  end
925
+ # :nocov:
901
926
 
902
927
  # Over rides original inspect for pretty printing in irb
903
928
  def inspect spacing=20, threshold=15
904
- longest =
905
- [
906
- @name.to_s.size,
907
- (@index.to_a.map(&:to_s).map(&:size).max || 0),
908
- (@data.map(&:to_s).map(&:size).max || 0),
909
- 3 # 'nil'.size
910
- ].max
911
-
912
- content = ''
913
- longest = spacing if longest > spacing
914
- name = @name || 'nil'
915
- metadata = @metadata || 'nil'
916
- formatter = "\n%#{longest}.#{longest}s %#{longest}.#{longest}s"
917
- content += "\n#<#{self.class}:#{object_id} @name = #{name} @metadata = #{metadata} @size = #{size} >"
918
-
919
- content += formatter % ['', name]
920
- @index.each_with_index do |index, num|
921
- content += formatter % [index.to_s, (self[*index] || 'nil').to_s]
922
- if num > threshold
923
- content += formatter % ['...', '...']
924
- break
925
- end
929
+ row_headers = index.is_a?(MultiIndex) ? index.sparse_tuples : index.to_a
930
+
931
+ "#<#{self.class}(#{size})#{':cataegory' if category?}>\n" +
932
+ Formatters::Table.format(
933
+ to_a.lazy.map { |v| [v] },
934
+ headers: @name && [@name],
935
+ row_headers: row_headers,
936
+ threshold: threshold,
937
+ spacing: spacing
938
+ )
939
+ end
940
+
941
+ # Sets new index for vector. Preserves index->value correspondence.
942
+ # Sets nil for new index keys absent from original index.
943
+ # @note Unlike #reorder! which takes positions as input it takes
944
+ # index as an input to reorder the vector
945
+ # @param [Daru::Index, Daru::MultiIndex] new_index new index to order with
946
+ # @return [Daru::Vector] vector reindexed with new index
947
+ def reindex! new_index
948
+ values = []
949
+ each_with_index do |val, i|
950
+ values[new_index[i]] = val if new_index.include?(i)
926
951
  end
927
- content += "\n"
952
+ values.fill(nil, values.size, new_index.size - values.size)
953
+
954
+ @data = cast_vector_to @dtype, values
955
+ @index = new_index
928
956
 
929
- content
957
+ update_position_cache
958
+
959
+ self
960
+ end
961
+
962
+ # Reorder the vector with given positions
963
+ # @note Unlike #reindex! which takes index as input, it takes
964
+ # positions as an input to reorder the vector
965
+ # @param [Array] order the order to reorder the vector with
966
+ # @return reordered vector
967
+ # @example
968
+ # dv = Daru::Vector.new [3, 2, 1], index: ['c', 'b', 'a']
969
+ # dv.reorder! [2, 1, 0]
970
+ # # => #<Daru::Vector(3)>
971
+ # # a 1
972
+ # # b 2
973
+ # # c 3
974
+ def reorder! order
975
+ @index = @index.reorder order
976
+ @data = order.map { |i| @data[i] }
977
+ update_position_cache
978
+ self
979
+ end
980
+
981
+ # Non-destructive version of #reorder!
982
+ def reorder order
983
+ dup.reorder! order
930
984
  end
931
985
 
932
986
  # Create a new vector with a different index, and preserve the indexing of
933
987
  # current elements.
934
988
  def reindex new_index
935
- vector = Daru::Vector.new([], index: new_index, name: @name, metadata: @metadata.dup)
936
-
937
- new_index.each do |idx|
938
- vector[idx] = @index.include?(idx) ? self[idx] : nil
939
- end
940
-
941
- vector
989
+ dup.reindex!(new_index)
942
990
  end
943
991
 
944
992
  def index= idx
@@ -956,17 +1004,16 @@ module Daru
956
1004
  #
957
1005
  # @param new_name [Symbol] The new name.
958
1006
  def rename new_name
959
- if new_name.is_a?(Numeric)
960
- @name = new_name
961
- return
962
- end
963
-
964
1007
  @name = new_name
1008
+ self
965
1009
  end
966
1010
 
967
- # Duplicate elements and indexes
1011
+ alias_method :name=, :rename
1012
+
1013
+ # Duplicated a vector
1014
+ # @return [Daru::Vector] duplicated vector
968
1015
  def dup
969
- Daru::Vector.new @data.dup, name: @name, metadata: @metadata.dup, index: @index.dup
1016
+ Daru::Vector.new @data.dup, name: @name, index: @index.dup
970
1017
  end
971
1018
 
972
1019
  # == Bootstrap
@@ -1019,8 +1066,8 @@ module Daru
1019
1066
  #
1020
1067
  # == Reference:
1021
1068
  # * Sawyer, S. (2005). Resampling Data: Using a Statistical Jacknife.
1022
- def jackknife(estimators, k=1)
1023
- raise "n should be divisible by k:#{k}" unless size % k==0
1069
+ def jackknife(estimators, k=1) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
1070
+ raise "n should be divisible by k:#{k}" unless (size % k).zero?
1024
1071
 
1025
1072
  nb = (size / k).to_i
1026
1073
  h_est, es, ps = prepare_bootstrap(estimators)
@@ -1057,62 +1104,123 @@ module Daru
1057
1104
  # vector, setting this to false will return the same vector.
1058
1105
  # Otherwise, a duplicate will be returned irrespective of
1059
1106
  # presence of missing data.
1060
- def only_valid as_a=:vector, duplicate=true
1061
- return dup if !has_missing_data? && as_a == :vector && duplicate
1062
- return self if !has_missing_data? && as_a == :vector && !duplicate
1063
- return to_a if !has_missing_data? && as_a != :vector
1064
-
1065
- new_index = @index.to_a - missing_positions
1066
- new_vector = new_index.map do |idx|
1067
- self[idx]
1068
- end
1069
1107
 
1070
- return new_vector if as_a != :vector
1108
+ def only_valid as_a=:vector, _duplicate=true
1109
+ # FIXME: Now duplicate is just ignored.
1110
+ # There are no spec that fail on this case, so I'll leave it
1111
+ # this way for now - zverok, 2016-05-07
1071
1112
 
1072
- Daru::Vector.new new_vector, index: new_index, name: @name, metadata: @metadata.dup, dtype: dtype
1113
+ new_index = @index.to_a - indexes(*Daru::MISSING_VALUES)
1114
+ new_vector = new_index.map { |idx| self[idx] }
1115
+
1116
+ if as_a == :vector
1117
+ Daru::Vector.new new_vector, index: new_index, name: @name, dtype: dtype
1118
+ else
1119
+ new_vector
1120
+ end
1121
+ end
1122
+ deprecate :only_valid, :reject_values, 2016, 10
1123
+
1124
+ # Return a vector with specified values removed
1125
+ # @param [Array] *values values to reject from resultant vector
1126
+ # @return [Daru::Vector] vector with specified values removed
1127
+ # @example
1128
+ # dv = Daru::Vector.new [1, 2, nil, Float::NAN]
1129
+ # dv.reject_values nil, Float::NAN
1130
+ # # => #<Daru::Vector(2)>
1131
+ # # 0 1
1132
+ # # 1 2
1133
+ def reject_values(*values)
1134
+ resultant_pos = size.times.to_a - positions(*values)
1135
+ dv = at(*resultant_pos)
1136
+ # Handle the case when number of positions is 1
1137
+ # and hence #at doesn't return a vector
1138
+ if dv.is_a?(Daru::Vector)
1139
+ dv
1140
+ else
1141
+ pos = resultant_pos.first
1142
+ at(pos..pos)
1143
+ end
1144
+ end
1145
+
1146
+ # Return indexes of values specified
1147
+ # @param [Array] *values values to find indexes for
1148
+ # @return [Array] array of indexes of values specified
1149
+ # @example
1150
+ # dv = Daru::Vector.new [1, 2, nil, Float::NAN], index: 11..14
1151
+ # dv.indexes nil, Float::NAN
1152
+ # # => [13, 14]
1153
+ def indexes(*values)
1154
+ index.to_a.values_at(*positions(*values))
1155
+ end
1156
+
1157
+ # Replaces specified values with a new value
1158
+ # @param [Array] old_values array of values to replace
1159
+ # @param [object] new_value new value to replace with
1160
+ # @note It performs the replace in place.
1161
+ # @return [Daru::Vector] Same vector itself with values
1162
+ # replaced with new value
1163
+ # @example
1164
+ # dv = Daru::Vector.new [1, 2, :a, :b]
1165
+ # dv.replace_values [:a, :b], nil
1166
+ # dv
1167
+ # # =>
1168
+ # # #<Daru::Vector:19903200 @name = nil @metadata = {} @size = 4 >
1169
+ # # nil
1170
+ # # 0 1
1171
+ # # 1 2
1172
+ # # 2 nil
1173
+ # # 3 nil
1174
+ def replace_values(old_values, new_value)
1175
+ old_values = [old_values] unless old_values.is_a? Array
1176
+ size.times do |pos|
1177
+ set_at([pos], new_value) if include_with_nan? old_values, at(pos)
1178
+ end
1179
+ self
1073
1180
  end
1074
1181
 
1075
1182
  # Returns a Vector containing only missing data (preserves indexes).
1076
1183
  def only_missing as_a=:vector
1077
1184
  if as_a == :vector
1078
- self[*missing_positions]
1185
+ self[*indexes(*Daru::MISSING_VALUES)]
1079
1186
  elsif as_a == :array
1080
- self[*missing_positions].to_a
1187
+ self[*indexes(*Daru::MISSING_VALUES)].to_a
1081
1188
  end
1082
1189
  end
1190
+ deprecate :only_missing, nil, 2016, 10
1083
1191
 
1084
1192
  # Returns a Vector with only numerical data. Missing data is included
1085
1193
  # but non-Numeric objects are excluded. Preserves index.
1086
1194
  def only_numerics
1087
- numeric_indexes = []
1088
-
1089
- each_with_index do |v, i|
1090
- numeric_indexes << i if v.is_a?(Numeric) || @missing_values.key?(v)
1091
- end
1195
+ numeric_indexes =
1196
+ each_with_index
1197
+ .select { |v, _i| v.is_a?(Numeric) || v.nil? }
1198
+ .map(&:last)
1092
1199
 
1093
1200
  self[*numeric_indexes]
1094
1201
  end
1095
1202
 
1203
+ DATE_REGEXP = /^(\d{2}-\d{2}-\d{4}|\d{4}-\d{2}-\d{2})$/
1204
+
1096
1205
  # Returns the database type for the vector, according to its content
1097
1206
  def db_type
1098
1207
  # first, detect any character not number
1099
- if @data.find { |v| v.to_s=~/\d{2,2}-\d{2,2}-\d{4,4}/ } ||
1100
- @data.find { |v| v.to_s=~/\d{4,4}-\d{2,2}-\d{2,2}/ }
1101
-
1102
- return 'DATE'
1103
- elsif @data.find { |v| v.to_s=~/[^0-9e.-]/ }
1104
- return 'VARCHAR (255)'
1105
- elsif @data.find { |v| v.to_s=~/\./ }
1106
- return 'DOUBLE'
1208
+ case
1209
+ when @data.any? { |v| v.to_s =~ DATE_REGEXP }
1210
+ 'DATE'
1211
+ when @data.any? { |v| v.to_s =~ /[^0-9e.-]/ }
1212
+ 'VARCHAR (255)'
1213
+ when @data.any? { |v| v.to_s =~ /\./ }
1214
+ 'DOUBLE'
1107
1215
  else
1108
- return 'INTEGER'
1216
+ 'INTEGER'
1109
1217
  end
1110
1218
  end
1111
1219
 
1112
1220
  # Copies the structure of the vector (i.e the index, size, etc.) and fills all
1113
1221
  # all values with nils.
1114
1222
  def clone_structure
1115
- Daru::Vector.new(([nil]*@size), name: @name, metadata: @metadata.dup, index: @index.dup)
1223
+ Daru::Vector.new(([nil]*size), name: @name, index: @index.dup)
1116
1224
  end
1117
1225
 
1118
1226
  # Save the vector to a file
@@ -1129,38 +1237,156 @@ module Daru
1129
1237
  data: @data.to_a,
1130
1238
  dtype: @dtype,
1131
1239
  name: @name,
1132
- metadata: @metadata,
1133
- index: @index,
1134
- missing_values: @missing_values
1240
+ index: @index
1135
1241
  )
1136
1242
  end
1137
1243
 
1138
- def self._load(data) # :nodoc:
1139
- h = Marshal.load(data)
1140
- Daru::Vector.new(h[:data],
1141
- index: h[:index],
1142
- name: h[:name], metadata: h[:metadata],
1143
- dtype: h[:dtype], missing_values: h[:missing_values])
1144
- end
1145
-
1244
+ # :nocov:
1146
1245
  def daru_vector(*)
1147
1246
  self
1148
1247
  end
1248
+ # :nocov:
1149
1249
 
1150
1250
  alias :dv :daru_vector
1151
1251
 
1252
+ # Converts a non category type vector to category type vector.
1253
+ # @param [Hash] opts options to convert to category
1254
+ # @option opts [true, false] :ordered Specify if vector is ordered or not.
1255
+ # If it is ordered, it can be sorted and min, max like functions would work
1256
+ # @option opts [Array] :categories set categories in the specified order
1257
+ # @return [Daru::Vector] vector with type category
1258
+ def to_category opts={}
1259
+ dv = Daru::Vector.new to_a, type: :category, name: @name, index: @index
1260
+ dv.ordered = opts[:ordered] || false
1261
+ dv.categories = opts[:categories] if opts[:categories]
1262
+ dv
1263
+ end
1264
+
1152
1265
  def method_missing(name, *args, &block)
1266
+ # FIXME: it is shamefully fragile. Should be either made stronger
1267
+ # (string/symbol dychotomy, informative errors) or removed totally. - zverok
1153
1268
  if name =~ /(.+)\=/
1154
- self[name] = args[0]
1269
+ self[$1.to_sym] = args[0]
1155
1270
  elsif has_index?(name)
1156
1271
  self[name]
1157
1272
  else
1158
- super(name, *args, &block)
1273
+ super
1274
+ end
1275
+ end
1276
+
1277
+ def respond_to_missing?(name, include_private=false)
1278
+ name.to_s.end_with?('=') || has_index?(name) || super
1279
+ end
1280
+
1281
+ # Partition a numeric variable into categories.
1282
+ # @param [Array<Numeric>] partitions an array whose consecutive elements
1283
+ # provide intervals for categories
1284
+ # @param [Hash] opts options to cut the partition
1285
+ # @option opts [:left, :right] :close_at specifies whether the interval closes at
1286
+ # the right side of left side
1287
+ # @option opts [Array] :labels names of the categories
1288
+ # @return [Daru::Vector] numeric variable converted to categorical variable
1289
+ # @example
1290
+ # heights = Daru::Vector.new [30, 35, 32, 50, 42, 51]
1291
+ # height_cat = heights.cut [30, 40, 50, 60], labels=['low', 'medium', 'high']
1292
+ # # => #<Daru::Vector(6)>
1293
+ # # 0 low
1294
+ # # 1 low
1295
+ # # 2 low
1296
+ # # 3 high
1297
+ # # 4 medium
1298
+ # # 5 high
1299
+ def cut partitions, opts={}
1300
+ close_at, labels = opts[:close_at] || :right, opts[:labels]
1301
+ partitions = partitions.to_a
1302
+ values = to_a.map { |val| cut_find_category partitions, val, close_at }
1303
+ cats = cut_categories(partitions, close_at)
1304
+
1305
+ dv = Daru::Vector.new values,
1306
+ index: @index,
1307
+ type: :category,
1308
+ categories: cats
1309
+
1310
+ # Rename categories if new labels provided
1311
+ if labels
1312
+ dv.rename_categories Hash[cats.zip(labels)]
1313
+ else
1314
+ dv
1315
+ end
1316
+ end
1317
+
1318
+ def positions(*values)
1319
+ case values
1320
+ when [nil]
1321
+ nil_positions
1322
+ when [Float::NAN]
1323
+ nan_positions
1324
+ when [nil, Float::NAN], [Float::NAN, nil]
1325
+ nil_positions + nan_positions
1326
+ else
1327
+ size.times.select { |i| include_with_nan? values, @data[i] }
1159
1328
  end
1160
1329
  end
1161
1330
 
1162
1331
  private
1163
1332
 
1333
+ def nil_positions
1334
+ @nil_positions ||
1335
+ @nil_positions = size.times.select { |i| @data[i].nil? }
1336
+ end
1337
+
1338
+ def nan_positions
1339
+ @nan_positions ||
1340
+ @nan_positions = size.times.select do |i|
1341
+ @data[i].respond_to?(:nan?) && @data[i].nan?
1342
+ end
1343
+ end
1344
+
1345
+ def initialize_vector source, opts
1346
+ index, source = parse_source(source, opts)
1347
+ set_name opts[:name]
1348
+
1349
+ @data = cast_vector_to(opts[:dtype] || :array, source, opts[:nm_dtype])
1350
+ @index = Index.coerce(index || @data.size)
1351
+
1352
+ guard_sizes!
1353
+
1354
+ @possibly_changed_type = true
1355
+ # Include plotting functionality
1356
+ self.plotting_library = Daru.plotting_library
1357
+ end
1358
+
1359
+ def parse_source source, opts
1360
+ if source.is_a?(Hash)
1361
+ [source.keys, source.values]
1362
+ else
1363
+ [opts[:index], source || []]
1364
+ end
1365
+ end
1366
+
1367
+ def guard_sizes!
1368
+ if @index.size > @data.size
1369
+ cast(dtype: :array) # NM with nils seg faults
1370
+ @data.fill(nil, @data.size...@index.size)
1371
+ elsif @index.size < @data.size
1372
+ raise IndexError, "Expected index size >= vector size. Index size : #{@index.size}, vector size : #{@data.size}"
1373
+ end
1374
+ end
1375
+
1376
+ def guard_type_check value
1377
+ @possibly_changed_type = true \
1378
+ if object? && (value.nil? || value.is_a?(Numeric)) ||
1379
+ numeric? && !value.is_a?(Numeric) && !value.nil?
1380
+ end
1381
+
1382
+ def split_value key, v
1383
+ case
1384
+ when v.nil? then nil
1385
+ when v.include?(key) then 1
1386
+ else 0
1387
+ end
1388
+ end
1389
+
1164
1390
  # For an array or hash of estimators methods, returns
1165
1391
  # an array with three elements
1166
1392
  # 1.- A hash with estimators names as keys and lambdas as values
@@ -1180,18 +1406,6 @@ module Daru
1180
1406
  [h_est, h_est.keys, bss]
1181
1407
  end
1182
1408
 
1183
- def keep? a, b, order
1184
- eval = yield(a, b)
1185
- if order == :ascending
1186
- return true if eval == -1
1187
- return false if eval == 1
1188
- elsif order == :descending
1189
- return false if eval == -1
1190
- return true if eval == 1
1191
- end
1192
- false
1193
- end
1194
-
1195
1409
  # Note: To maintain sanity, this _MUST_ be the _ONLY_ place in daru where the
1196
1410
  # @dtype variable is set and the underlying data type of vector changed.
1197
1411
  def cast_vector_to dtype, source=nil, nm_dtype=nil
@@ -1203,25 +1417,13 @@ module Daru
1203
1417
  when :nmatrix then Daru::Accessors::NMatrixWrapper.new(source, self, nm_dtype)
1204
1418
  when :gsl then Daru::Accessors::GSLWrapper.new(source, self)
1205
1419
  when :mdarray then raise NotImplementedError, 'MDArray not yet supported.'
1206
- else raise "Unknown dtype #{dtype}"
1420
+ else raise ArgumentError, "Unknown dtype #{dtype}"
1207
1421
  end
1208
1422
 
1209
1423
  @dtype = dtype || :array
1210
1424
  new_vector
1211
1425
  end
1212
1426
 
1213
- def index_for index
1214
- if @index.include?(index)
1215
- @index[index]
1216
- elsif index.is_a?(Numeric)
1217
- index
1218
- end
1219
- end
1220
-
1221
- def set_size
1222
- @size = @data.size
1223
- end
1224
-
1225
1427
  def set_name name # rubocop:disable Style/AccessorMethodName
1226
1428
  @name =
1227
1429
  if name.is_a?(Numeric) then name
@@ -1232,38 +1434,109 @@ module Daru
1232
1434
  end
1233
1435
  end
1234
1436
 
1235
- def set_missing_positions
1236
- @missing_positions = []
1237
- @index.each do |e|
1238
- @missing_positions << e if @missing_values.key?(self[e])
1437
+ # Raises IndexError when one of the positions is an invalid position
1438
+ def validate_positions *positions
1439
+ positions = [positions] if positions.is_a? Integer
1440
+ positions.each do |pos|
1441
+ raise IndexError, "#{pos} is not a valid position." if pos >= size
1239
1442
  end
1240
1443
  end
1241
1444
 
1242
- def try_create_index potential_index
1243
- if potential_index.is_a?(Daru::MultiIndex) || potential_index.is_a?(Daru::Index)
1244
- potential_index
1445
+ # coerce ranges, integers and array in appropriate ways
1446
+ def coerce_positions *positions
1447
+ if positions.size == 1
1448
+ case positions.first
1449
+ when Integer
1450
+ positions.first
1451
+ when Range
1452
+ size.times.to_a[positions.first]
1453
+ else
1454
+ raise ArgumentError, 'Unkown position type.'
1455
+ end
1245
1456
  else
1246
- Daru::Index.new(potential_index)
1457
+ positions
1247
1458
  end
1248
1459
  end
1249
1460
 
1250
- def element_from_numeric_index location
1251
- pos = index_for location
1252
- pos ? @data[pos] : nil
1461
+ # Helper method for []=.
1462
+ # Assigs existing index to another value
1463
+ def modify_vector(indexes, val)
1464
+ positions = @index.pos(*indexes)
1465
+
1466
+ if positions.is_a? Numeric
1467
+ @data[positions] = val
1468
+ else
1469
+ positions.each { |pos| @data[pos] = val }
1470
+ end
1253
1471
  end
1254
1472
 
1255
- # Setup missing_values. The missing_values instance variable is set
1256
- # as a Hash for faster lookup times.
1257
- def set_missing_values values_arry # rubocop:disable Style/AccessorMethodName
1258
- @missing_values = {}
1259
- @missing_values[nil] = 0
1260
- if values_arry
1261
- values_arry.each do |e|
1262
- # If dtype is :gsl then missing values have to be converted to float
1263
- e = e.to_f if dtype == :gsl && e.is_a?(Numeric)
1264
- @missing_values[e] = 0
1473
+ # Helper method for []=.
1474
+ # Add a new index and assign it value
1475
+ def insert_vector(indexes, val)
1476
+ new_index = @index.add(*indexes)
1477
+ # May be create +=
1478
+ (new_index.size - @index.size).times { @data << val }
1479
+ @index = new_index
1480
+ end
1481
+
1482
+ # Works similar to #[]= but also insert the vector in case index is not valid
1483
+ # It is there only to be accessed by Daru::DataFrame and not meant for user.
1484
+ def set indexes, val
1485
+ cast(dtype: :array) if val.nil? && dtype != :array
1486
+ guard_type_check(val)
1487
+
1488
+ if @index.valid?(*indexes)
1489
+ modify_vector(indexes, val)
1490
+ else
1491
+ insert_vector(indexes, val)
1492
+ end
1493
+
1494
+ update_position_cache
1495
+ end
1496
+
1497
+ def cut_find_category partitions, val, close_at
1498
+ case close_at
1499
+ when :right
1500
+ right_index = partitions.index { |i| i > val }
1501
+ raise ArgumentError, 'Invalid partition' if right_index.nil?
1502
+ left_index = right_index - 1
1503
+ "#{partitions[left_index]}-#{partitions[right_index]-1}"
1504
+ when :left
1505
+ right_index = partitions.index { |i| i >= val }
1506
+ raise ArgumentError, 'Invalid partition' if right_index.nil?
1507
+ left_index = right_index - 1
1508
+ "#{partitions[left_index]+1}-#{partitions[right_index]}"
1509
+ else
1510
+ raise ArgumentError, "Invalid parameter #{close_at} to close_at."
1511
+ end
1512
+ end
1513
+
1514
+ def cut_categories partitions, close_at
1515
+ case close_at
1516
+ when :right
1517
+ Array.new(partitions.size-1) do |left_index|
1518
+ "#{partitions[left_index]}-#{partitions[left_index+1]-1}"
1519
+ end
1520
+ when :left
1521
+ Array.new(partitions.size-1) do |left_index|
1522
+ "#{partitions[left_index]+1}-#{partitions[left_index+1]}"
1265
1523
  end
1266
1524
  end
1267
1525
  end
1526
+
1527
+ def include_with_nan? array, value
1528
+ # Returns true if value is included in array.
1529
+ # Similar to include? but also works if value is Float::NAN
1530
+ if value.respond_to?(:nan?) && value.nan?
1531
+ array.any? { |i| i.respond_to?(:nan?) && i.nan? }
1532
+ else
1533
+ array.include? value
1534
+ end
1535
+ end
1536
+
1537
+ def update_position_cache
1538
+ @nil_positions = nil
1539
+ @nan_positions = nil
1540
+ end
1268
1541
  end
1269
1542
  end