daru 0.1.3.1 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rspec +2 -1
- data/.rspec_formatter.rb +33 -0
- data/.rubocop.yml +26 -2
- data/History.md +38 -0
- data/README.md +22 -13
- data/Rakefile +50 -2
- data/benchmarks/csv_reading.rb +22 -0
- data/daru.gemspec +9 -2
- data/lib/daru.rb +36 -4
- data/lib/daru/accessors/array_wrapper.rb +6 -1
- data/lib/daru/accessors/dataframe_by_row.rb +10 -2
- data/lib/daru/accessors/gsl_wrapper.rb +1 -3
- data/lib/daru/accessors/nmatrix_wrapper.rb +9 -0
- data/lib/daru/category.rb +935 -0
- data/lib/daru/core/group_by.rb +29 -38
- data/lib/daru/core/merge.rb +186 -145
- data/lib/daru/core/query.rb +22 -11
- data/lib/daru/dataframe.rb +976 -885
- data/lib/daru/date_time/index.rb +166 -166
- data/lib/daru/date_time/offsets.rb +66 -77
- data/lib/daru/formatters/table.rb +54 -0
- data/lib/daru/helpers/array.rb +40 -0
- data/lib/daru/index.rb +476 -73
- data/lib/daru/io/io.rb +66 -45
- data/lib/daru/io/sql_data_source.rb +33 -62
- data/lib/daru/iruby/helpers.rb +38 -0
- data/lib/daru/iruby/templates/dataframe.html.erb +52 -0
- data/lib/daru/iruby/templates/dataframe_mi.html.erb +58 -0
- data/lib/daru/iruby/templates/multi_index.html.erb +12 -0
- data/lib/daru/iruby/templates/vector.html.erb +27 -0
- data/lib/daru/iruby/templates/vector_mi.html.erb +36 -0
- data/lib/daru/maths/arithmetic/dataframe.rb +16 -18
- data/lib/daru/maths/arithmetic/vector.rb +4 -6
- data/lib/daru/maths/statistics/dataframe.rb +8 -15
- data/lib/daru/maths/statistics/vector.rb +120 -98
- data/lib/daru/monkeys.rb +12 -40
- data/lib/daru/plotting/gruff.rb +3 -0
- data/lib/daru/plotting/gruff/category.rb +49 -0
- data/lib/daru/plotting/gruff/dataframe.rb +91 -0
- data/lib/daru/plotting/gruff/vector.rb +57 -0
- data/lib/daru/plotting/nyaplot.rb +3 -0
- data/lib/daru/plotting/nyaplot/category.rb +34 -0
- data/lib/daru/plotting/nyaplot/dataframe.rb +187 -0
- data/lib/daru/plotting/nyaplot/vector.rb +46 -0
- data/lib/daru/vector.rb +694 -421
- data/lib/daru/version.rb +1 -1
- data/profile/_base.rb +23 -0
- data/profile/df_to_a.rb +10 -0
- data/profile/filter.rb +13 -0
- data/profile/joining.rb +13 -0
- data/profile/sorting.rb +12 -0
- data/profile/vector_each_with_index.rb +9 -0
- data/spec/accessors/wrappers_spec.rb +2 -4
- data/spec/categorical_spec.rb +1734 -0
- data/spec/core/group_by_spec.rb +52 -2
- data/spec/core/merge_spec.rb +63 -2
- data/spec/core/query_spec.rb +236 -80
- data/spec/dataframe_spec.rb +1373 -79
- data/spec/date_time/data_spec.rb +3 -5
- data/spec/date_time/index_spec.rb +154 -17
- data/spec/date_time/offsets_spec.rb +3 -4
- data/spec/fixtures/empties.dat +2 -0
- data/spec/fixtures/strings.dat +2 -0
- data/spec/formatters/table_formatter_spec.rb +99 -0
- data/spec/helpers_spec.rb +8 -0
- data/spec/index/categorical_index_spec.rb +168 -0
- data/spec/index/index_spec.rb +283 -0
- data/spec/index/multi_index_spec.rb +570 -0
- data/spec/io/io_spec.rb +31 -4
- data/spec/io/sql_data_source_spec.rb +0 -1
- data/spec/iruby/dataframe_spec.rb +172 -0
- data/spec/iruby/helpers_spec.rb +49 -0
- data/spec/iruby/multi_index_spec.rb +37 -0
- data/spec/iruby/vector_spec.rb +107 -0
- data/spec/math/arithmetic/dataframe_spec.rb +71 -13
- data/spec/math/arithmetic/vector_spec.rb +8 -10
- data/spec/math/statistics/dataframe_spec.rb +3 -5
- data/spec/math/statistics/vector_spec.rb +45 -55
- data/spec/monkeys_spec.rb +32 -9
- data/spec/plotting/dataframe_spec.rb +386 -0
- data/spec/plotting/vector_spec.rb +230 -0
- data/spec/shared/vector_display_spec.rb +215 -0
- data/spec/spec_helper.rb +23 -0
- data/spec/vector_spec.rb +905 -138
- metadata +143 -11
- data/.rubocop_todo.yml +0 -44
- data/lib/daru/plotting/dataframe.rb +0 -104
- data/lib/daru/plotting/vector.rb +0 -38
- data/spec/daru_spec.rb +0 -58
- data/spec/index_spec.rb +0 -375
@@ -0,0 +1,46 @@
|
|
1
|
+
module Daru
|
2
|
+
module Plotting
|
3
|
+
module Vector
|
4
|
+
module NyaplotLibrary
|
5
|
+
# Plots a Vector with Nyaplot on IRuby using the given options. Yields the
|
6
|
+
# plot object (Nyaplot::Plot) and the diagram object (Nyaplot::Diagram)
|
7
|
+
# to the block, which can be used for setting various options as per the
|
8
|
+
# Nyaplot API.
|
9
|
+
#
|
10
|
+
# == Options
|
11
|
+
# type (:scatter, :bar, :histogram), title, x_label, y_label, color(true/false)
|
12
|
+
#
|
13
|
+
# == Usage
|
14
|
+
# vector = Daru::Vector.new [10,20,30,40], [:one, :two, :three, :four]
|
15
|
+
# vector.plot(type: :bar) do |plot|
|
16
|
+
# plot.title "My first plot"
|
17
|
+
# plot.width 1200
|
18
|
+
# end
|
19
|
+
def plot opts={}
|
20
|
+
options = {
|
21
|
+
type: :scatter
|
22
|
+
}.merge(opts)
|
23
|
+
|
24
|
+
x_axis = options[:type] == :scatter ? Array.new(size) { |i| i } : @index.to_a
|
25
|
+
plot = Nyaplot::Plot.new
|
26
|
+
diagram = create_diagram plot, options[:type], x_axis
|
27
|
+
|
28
|
+
yield plot, diagram if block_given?
|
29
|
+
|
30
|
+
plot.show
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def create_diagram plot, type, x_axis
|
36
|
+
case type
|
37
|
+
when :box, :histogram
|
38
|
+
plot.add(type, @data.to_a)
|
39
|
+
else
|
40
|
+
plot.add(type, x_axis, @data.to_a)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
data/lib/daru/vector.rb
CHANGED
@@ -1,16 +1,98 @@
|
|
1
1
|
require 'daru/maths/arithmetic/vector.rb'
|
2
2
|
require 'daru/maths/statistics/vector.rb'
|
3
|
-
require 'daru/plotting/
|
3
|
+
require 'daru/plotting/gruff.rb'
|
4
|
+
require 'daru/plotting/nyaplot.rb'
|
4
5
|
require 'daru/accessors/array_wrapper.rb'
|
5
6
|
require 'daru/accessors/nmatrix_wrapper.rb'
|
6
7
|
require 'daru/accessors/gsl_wrapper.rb'
|
8
|
+
require 'daru/category.rb'
|
7
9
|
|
8
10
|
module Daru
|
9
|
-
class Vector
|
11
|
+
class Vector # rubocop:disable Metrics/ClassLength
|
10
12
|
include Enumerable
|
11
13
|
include Daru::Maths::Arithmetic::Vector
|
12
14
|
include Daru::Maths::Statistics::Vector
|
13
|
-
|
15
|
+
extend Gem::Deprecate
|
16
|
+
|
17
|
+
class << self
|
18
|
+
# Create a new vector by specifying the size and an optional value
|
19
|
+
# and block to generate values.
|
20
|
+
#
|
21
|
+
# == Description
|
22
|
+
#
|
23
|
+
# The *new_with_size* class method lets you create a Daru::Vector
|
24
|
+
# by specifying the size as the argument. The optional block, if
|
25
|
+
# supplied, is run once for populating each element in the Vector.
|
26
|
+
#
|
27
|
+
# The result of each run of the block is the value that is ultimately
|
28
|
+
# assigned to that position in the Vector.
|
29
|
+
#
|
30
|
+
# == Options
|
31
|
+
# :value
|
32
|
+
# All the rest like .new
|
33
|
+
def new_with_size n, opts={}, &block
|
34
|
+
value = opts.delete :value
|
35
|
+
block ||= ->(_) { value }
|
36
|
+
Daru::Vector.new Array.new(n, &block), opts
|
37
|
+
end
|
38
|
+
|
39
|
+
# Create a vector using (almost) any object
|
40
|
+
# * Array: flattened
|
41
|
+
# * Range: transformed using to_a
|
42
|
+
# * Daru::Vector
|
43
|
+
# * Numeric and string values
|
44
|
+
#
|
45
|
+
# == Description
|
46
|
+
#
|
47
|
+
# The `Vector.[]` class method creates a vector from almost any
|
48
|
+
# object that has a `#to_a` method defined on it. It is similar
|
49
|
+
# to R's `c` method.
|
50
|
+
#
|
51
|
+
# == Usage
|
52
|
+
#
|
53
|
+
# a = Daru::Vector[1,2,3,4,6..10]
|
54
|
+
# #=>
|
55
|
+
# # <Daru::Vector:99448510 @name = nil @size = 9 >
|
56
|
+
# # nil
|
57
|
+
# # 0 1
|
58
|
+
# # 1 2
|
59
|
+
# # 2 3
|
60
|
+
# # 3 4
|
61
|
+
# # 4 6
|
62
|
+
# # 5 7
|
63
|
+
# # 6 8
|
64
|
+
# # 7 9
|
65
|
+
# # 8 10
|
66
|
+
def [](*indexes)
|
67
|
+
values = indexes.map do |a|
|
68
|
+
a.respond_to?(:to_a) ? a.to_a : a
|
69
|
+
end.flatten
|
70
|
+
Daru::Vector.new(values)
|
71
|
+
end
|
72
|
+
|
73
|
+
def _load(data) # :nodoc:
|
74
|
+
h = Marshal.load(data)
|
75
|
+
Daru::Vector.new(h[:data],
|
76
|
+
index: h[:index],
|
77
|
+
name: h[:name],
|
78
|
+
dtype: h[:dtype], missing_values: h[:missing_values])
|
79
|
+
end
|
80
|
+
|
81
|
+
def coerce(data, options={})
|
82
|
+
case data
|
83
|
+
when Daru::Vector
|
84
|
+
data
|
85
|
+
when Array, Hash
|
86
|
+
new(data, options)
|
87
|
+
else
|
88
|
+
raise ArgumentError, "Can't coerce #{data.class} to #{self}"
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def size
|
94
|
+
@data.size
|
95
|
+
end
|
14
96
|
|
15
97
|
def each(&block)
|
16
98
|
return to_enum(:each) unless block_given?
|
@@ -26,17 +108,17 @@ module Daru
|
|
26
108
|
self
|
27
109
|
end
|
28
110
|
|
29
|
-
def each_with_index
|
111
|
+
def each_with_index &block
|
30
112
|
return to_enum(:each_with_index) unless block_given?
|
31
113
|
|
32
|
-
@index.each
|
114
|
+
@data.to_a.zip(@index.to_a).each(&block)
|
115
|
+
|
33
116
|
self
|
34
117
|
end
|
35
118
|
|
36
119
|
def map!(&block)
|
37
120
|
return to_enum(:map!) unless block_given?
|
38
121
|
@data.map!(&block)
|
39
|
-
update
|
40
122
|
self
|
41
123
|
end
|
42
124
|
|
@@ -44,8 +126,6 @@ module Daru
|
|
44
126
|
attr_reader :name
|
45
127
|
# The row index. Can be either Daru::Index or Daru::MultiIndex.
|
46
128
|
attr_reader :index
|
47
|
-
# The total number of elements of the vector.
|
48
|
-
attr_reader :size
|
49
129
|
# The underlying dtype of the Vector. Can be either :array, :nmatrix or :gsl.
|
50
130
|
attr_reader :dtype
|
51
131
|
# If the dtype is :nmatrix, this attribute represents the data type of the
|
@@ -54,13 +134,16 @@ module Daru
|
|
54
134
|
attr_reader :nm_dtype
|
55
135
|
# An Array or the positions in the vector that are being treated as 'missing'.
|
56
136
|
attr_reader :missing_positions
|
137
|
+
deprecate :missing_positions, :indexes, 2016, 10
|
57
138
|
# Store a hash of labels for values. Supplementary only. Recommend using index
|
58
139
|
# for proper usage.
|
59
140
|
attr_accessor :labels
|
60
141
|
# Store vector data in an array
|
61
142
|
attr_reader :data
|
62
|
-
#
|
63
|
-
|
143
|
+
# Ploting library being used for this vector
|
144
|
+
attr_reader :plotting_library
|
145
|
+
# TODO: Make private.
|
146
|
+
attr_reader :nil_positions, :nan_positions
|
64
147
|
|
65
148
|
# Create a Vector object.
|
66
149
|
#
|
@@ -93,102 +176,27 @@ module Daru
|
|
93
176
|
# vecarr = Daru::Vector.new [1,2,3,4], index: [:a, :e, :i, :o]
|
94
177
|
# vechsh = Daru::Vector.new({a: 1, e: 2, i: 3, o: 4})
|
95
178
|
def initialize source, opts={}
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
source
|
179
|
+
if opts[:type] == :category
|
180
|
+
# Initialize category type vector
|
181
|
+
extend Daru::Category
|
182
|
+
initialize_category source, opts
|
100
183
|
else
|
101
|
-
|
102
|
-
source
|
184
|
+
# Initialize non-category type vector
|
185
|
+
initialize_vector source, opts
|
103
186
|
end
|
104
|
-
name = opts[:name]
|
105
|
-
set_name name
|
106
|
-
|
107
|
-
@metadata = opts[:metadata] || {}
|
108
|
-
|
109
|
-
@data = cast_vector_to(opts[:dtype] || :array, source, opts[:nm_dtype])
|
110
|
-
@index = try_create_index(index || @data.size)
|
111
|
-
|
112
|
-
if @index.size > @data.size
|
113
|
-
cast(dtype: :array) # NM with nils seg faults
|
114
|
-
(@index.size - @data.size).times { @data << nil }
|
115
|
-
elsif @index.size < @data.size
|
116
|
-
raise IndexError, "Expected index size >= vector size. Index size : #{@index.size}, vector size : #{@data.size}"
|
117
|
-
end
|
118
|
-
|
119
|
-
@possibly_changed_type = true
|
120
|
-
set_missing_values opts[:missing_values]
|
121
|
-
set_missing_positions
|
122
|
-
set_size
|
123
187
|
end
|
124
188
|
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
# supplied, is run once for populating each element in the Vector.
|
133
|
-
#
|
134
|
-
# The result of each run of the block is the value that is ultimately
|
135
|
-
# assigned to that position in the Vector.
|
136
|
-
#
|
137
|
-
# == Options
|
138
|
-
# :value
|
139
|
-
# All the rest like .new
|
140
|
-
def self.new_with_size n, opts={}, &block
|
141
|
-
value = opts[:value]
|
142
|
-
opts.delete :value
|
143
|
-
if block
|
144
|
-
Daru::Vector.new Array.new(n) { |i| block.call(i) }, opts
|
189
|
+
def plotting_library= lib
|
190
|
+
case lib
|
191
|
+
when :gruff, :nyaplot
|
192
|
+
@plotting_library = lib
|
193
|
+
extend Module.const_get(
|
194
|
+
"Daru::Plotting::Vector::#{lib.to_s.capitalize}Library"
|
195
|
+
) if Daru.send("has_#{lib}?".to_sym)
|
145
196
|
else
|
146
|
-
|
147
|
-
|
148
|
-
end
|
149
|
-
|
150
|
-
# Create a vector using (almost) any object
|
151
|
-
# * Array: flattened
|
152
|
-
# * Range: transformed using to_a
|
153
|
-
# * Daru::Vector
|
154
|
-
# * Numeric and string values
|
155
|
-
#
|
156
|
-
# == Description
|
157
|
-
#
|
158
|
-
# The `Vector.[]` class method creates a vector from almost any
|
159
|
-
# object that has a `#to_a` method defined on it. It is similar
|
160
|
-
# to R's `c` method.
|
161
|
-
#
|
162
|
-
# == Usage
|
163
|
-
#
|
164
|
-
# a = Daru::Vector[1,2,3,4,6..10]
|
165
|
-
# #=>
|
166
|
-
# # <Daru::Vector:99448510 @name = nil @size = 9 >
|
167
|
-
# # nil
|
168
|
-
# # 0 1
|
169
|
-
# # 1 2
|
170
|
-
# # 2 3
|
171
|
-
# # 3 4
|
172
|
-
# # 4 6
|
173
|
-
# # 5 7
|
174
|
-
# # 6 8
|
175
|
-
# # 7 9
|
176
|
-
# # 8 10
|
177
|
-
def self.[](*args)
|
178
|
-
values = []
|
179
|
-
args.each do |a|
|
180
|
-
case a
|
181
|
-
when Array
|
182
|
-
values.concat a.flatten
|
183
|
-
when Daru::Vector
|
184
|
-
values.concat a.to_a
|
185
|
-
when Range
|
186
|
-
values.concat a.to_a
|
187
|
-
else
|
188
|
-
values << a
|
189
|
-
end
|
197
|
+
raise ArguementError, "Plotting library #{lib} not supported. "\
|
198
|
+
'Supported libraries are :nyaplot and :gruff'
|
190
199
|
end
|
191
|
-
Daru::Vector.new(values)
|
192
200
|
end
|
193
201
|
|
194
202
|
# Get one or more elements with specified index or a range.
|
@@ -203,19 +211,63 @@ module Daru
|
|
203
211
|
# # For vectors employing hierarchial multi index
|
204
212
|
#
|
205
213
|
def [](*input_indexes)
|
206
|
-
# Get
|
207
|
-
|
214
|
+
# Get array of positions indexes
|
215
|
+
positions = @index.pos(*input_indexes)
|
208
216
|
|
209
217
|
# If one object is asked return it
|
210
|
-
return @data[
|
218
|
+
return @data[positions] if positions.is_a? Numeric
|
211
219
|
|
212
|
-
# Form a new Vector using indexes
|
220
|
+
# Form a new Vector using positional indexes
|
213
221
|
Daru::Vector.new(
|
214
|
-
|
215
|
-
name: @name,
|
222
|
+
positions.map { |loc| @data[loc] },
|
223
|
+
name: @name,
|
224
|
+
index: @index.subset(*input_indexes), dtype: @dtype
|
216
225
|
)
|
217
226
|
end
|
218
227
|
|
228
|
+
# Returns vector of values given positional values
|
229
|
+
# @param [Array<object>] *positions positional values
|
230
|
+
# @return [object] vector
|
231
|
+
# @example
|
232
|
+
# dv = Daru::Vector.new 'a'..'e'
|
233
|
+
# dv.at 0, 1, 2
|
234
|
+
# # => #<Daru::Vector(3)>
|
235
|
+
# # 0 a
|
236
|
+
# # 1 b
|
237
|
+
# # 2 c
|
238
|
+
def at *positions
|
239
|
+
# to be used to form index
|
240
|
+
original_positions = positions
|
241
|
+
positions = coerce_positions(*positions)
|
242
|
+
validate_positions(*positions)
|
243
|
+
|
244
|
+
if positions.is_a? Integer
|
245
|
+
@data[positions]
|
246
|
+
else
|
247
|
+
values = positions.map { |pos| @data[pos] }
|
248
|
+
Daru::Vector.new values, index: @index.at(*original_positions), dtype: dtype
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
# Change value at given positions
|
253
|
+
# @param [Array<object>] *positions positional values
|
254
|
+
# @param [object] val value to assign
|
255
|
+
# @example
|
256
|
+
# dv = Daru::Vector.new 'a'..'e'
|
257
|
+
# dv.set_at [0, 1], 'x'
|
258
|
+
# dv
|
259
|
+
# # => #<Daru::Vector(5)>
|
260
|
+
# # 0 x
|
261
|
+
# # 1 x
|
262
|
+
# # 2 c
|
263
|
+
# # 3 d
|
264
|
+
# # 4 e
|
265
|
+
def set_at positions, val
|
266
|
+
validate_positions(*positions)
|
267
|
+
positions.map { |pos| @data[pos] = val }
|
268
|
+
update_position_cache
|
269
|
+
end
|
270
|
+
|
219
271
|
# Just like in Hashes, you can specify the index label of the Daru::Vector
|
220
272
|
# and assign an element an that place in the Daru::Vector.
|
221
273
|
#
|
@@ -229,57 +281,14 @@ module Daru
|
|
229
281
|
# # a 999
|
230
282
|
# # b 2
|
231
283
|
# # c 3
|
232
|
-
def []=(*
|
233
|
-
cast(dtype: :array) if
|
234
|
-
|
235
|
-
@possibly_changed_type = true if @type == :object && (value.nil? ||
|
236
|
-
value.is_a?(Numeric))
|
237
|
-
@possibly_changed_type = true if @type == :numeric && (!value.is_a?(Numeric) &&
|
238
|
-
!value.nil?)
|
239
|
-
|
240
|
-
pos = @index[*location]
|
241
|
-
|
242
|
-
if pos.is_a?(Numeric)
|
243
|
-
@data[pos] = value
|
244
|
-
else
|
245
|
-
begin
|
246
|
-
pos.each { |tuple| self[tuple] = value }
|
247
|
-
rescue NoMethodError
|
248
|
-
raise IndexError, "Specified index #{pos.inspect} does not exist."
|
249
|
-
end
|
250
|
-
end
|
251
|
-
|
252
|
-
set_size
|
253
|
-
set_missing_positions unless Daru.lazy_update
|
254
|
-
end
|
284
|
+
def []=(*indexes, val)
|
285
|
+
cast(dtype: :array) if val.nil? && dtype != :array
|
255
286
|
|
256
|
-
|
257
|
-
# type. To set missing values see the missing_values= method.
|
258
|
-
def missing_values
|
259
|
-
@missing_values.keys
|
260
|
-
end
|
287
|
+
guard_type_check(val)
|
261
288
|
|
262
|
-
|
263
|
-
#
|
264
|
-
# == Usage
|
265
|
-
#
|
266
|
-
# v = Daru::Vector.new [1,2,3,4,5]
|
267
|
-
# v.missing_values = [3]
|
268
|
-
# v.update
|
269
|
-
# v.missing_positions
|
270
|
-
# #=> [2]
|
271
|
-
def missing_values= values
|
272
|
-
set_missing_values values
|
273
|
-
set_missing_positions unless Daru.lazy_update
|
274
|
-
end
|
289
|
+
modify_vector(indexes, val)
|
275
290
|
|
276
|
-
|
277
|
-
# after assingment/deletion etc. are complete. This is provided so that
|
278
|
-
# time is not wasted in creating the metadata for the vector each time
|
279
|
-
# assignment/deletion of elements is done. Updating data this way is called
|
280
|
-
# lazy loading. To set or unset lazy loading, see the .lazy_update= method.
|
281
|
-
def update
|
282
|
-
Daru.lazy_update and set_missing_positions
|
291
|
+
update_position_cache
|
283
292
|
end
|
284
293
|
|
285
294
|
# Two vectors are equal if the have the exact same index values corresponding
|
@@ -287,7 +296,7 @@ module Daru
|
|
287
296
|
def == other
|
288
297
|
case other
|
289
298
|
when Daru::Vector
|
290
|
-
@index == other.index &&
|
299
|
+
@index == other.index && size == other.size &&
|
291
300
|
@index.all? { |index| self[index] == other[index] }
|
292
301
|
else
|
293
302
|
super
|
@@ -405,8 +414,8 @@ module Daru
|
|
405
414
|
# # 11 5
|
406
415
|
# # 13 5
|
407
416
|
# # 15 1
|
408
|
-
def where
|
409
|
-
Daru::Core::Query.vector_where
|
417
|
+
def where bool_array
|
418
|
+
Daru::Core::Query.vector_where self, bool_array
|
410
419
|
end
|
411
420
|
|
412
421
|
def head q=10
|
@@ -414,18 +423,41 @@ module Daru
|
|
414
423
|
end
|
415
424
|
|
416
425
|
def tail q=10
|
417
|
-
|
426
|
+
start = [size - q, 0].max
|
427
|
+
self[start..(size-1)]
|
418
428
|
end
|
419
429
|
|
420
430
|
def empty?
|
421
431
|
@index.empty?
|
422
432
|
end
|
423
433
|
|
434
|
+
def numeric?
|
435
|
+
type == :numeric
|
436
|
+
end
|
437
|
+
|
438
|
+
def object?
|
439
|
+
type == :object
|
440
|
+
end
|
441
|
+
|
424
442
|
# Reports whether missing data is present in the Vector.
|
425
443
|
def has_missing_data?
|
426
|
-
!
|
444
|
+
!indexes(*Daru::MISSING_VALUES).empty?
|
427
445
|
end
|
428
446
|
alias :flawed? :has_missing_data?
|
447
|
+
deprecate :has_missing_data?, :include_values?, 2016, 10
|
448
|
+
deprecate :flawed?, :include_values?, 2016, 10
|
449
|
+
|
450
|
+
# Check if any one of mentioned values occur in the vector
|
451
|
+
# @param [Array] *values values to check for
|
452
|
+
# @return [true, false] returns true if any one of specified values
|
453
|
+
# occur in the vector
|
454
|
+
# @example
|
455
|
+
# dv = Daru::Vector.new [1, 2, 3, 4, nil]
|
456
|
+
# dv.include_values? nil, Float::NAN
|
457
|
+
# # => true
|
458
|
+
def include_values?(*values)
|
459
|
+
values.any? { |v| include_with_nan? @data, v }
|
460
|
+
end
|
429
461
|
|
430
462
|
# Append an element to the vector by specifying the element and index
|
431
463
|
def concat element, index
|
@@ -434,8 +466,7 @@ module Daru
|
|
434
466
|
@index |= [index]
|
435
467
|
@data[@index[index]] = element
|
436
468
|
|
437
|
-
|
438
|
-
set_missing_positions unless Daru.lazy_update
|
469
|
+
update_position_cache
|
439
470
|
end
|
440
471
|
alias :push :concat
|
441
472
|
alias :<< :concat
|
@@ -463,8 +494,7 @@ module Daru
|
|
463
494
|
@data.delete_at @index[index]
|
464
495
|
@index = Daru::Index.new(@index.to_a - [index])
|
465
496
|
|
466
|
-
|
467
|
-
set_missing_positions unless Daru.lazy_update
|
497
|
+
update_position_cache
|
468
498
|
end
|
469
499
|
|
470
500
|
# The type of data contained in the vector. Can be :object or :numeric. If
|
@@ -489,6 +519,16 @@ module Daru
|
|
489
519
|
@type
|
490
520
|
end
|
491
521
|
|
522
|
+
# Tells if vector is categorical or not.
|
523
|
+
# @return [true, false] true if vector is of type category, false otherwise
|
524
|
+
# @example
|
525
|
+
# dv = Daru::Vector.new [1, 2, 3], type: :category
|
526
|
+
# dv.category?
|
527
|
+
# # => true
|
528
|
+
def category?
|
529
|
+
type == :category
|
530
|
+
end
|
531
|
+
|
492
532
|
# Get index of element
|
493
533
|
def index_of element
|
494
534
|
case dtype
|
@@ -500,11 +540,9 @@ module Daru
|
|
500
540
|
# Keep only unique elements of the vector alongwith their indexes.
|
501
541
|
def uniq
|
502
542
|
uniq_vector = @data.uniq
|
503
|
-
new_index = uniq_vector.
|
504
|
-
acc << index_of(element)
|
505
|
-
end
|
543
|
+
new_index = uniq_vector.map { |element| index_of(element) }
|
506
544
|
|
507
|
-
Daru::Vector.new uniq_vector, name: @name,
|
545
|
+
Daru::Vector.new uniq_vector, name: @name, index: new_index, dtype: @dtype
|
508
546
|
end
|
509
547
|
|
510
548
|
def any? &block
|
@@ -531,47 +569,46 @@ module Daru
|
|
531
569
|
# v = Daru::Vector.new ["My first guitar", "jazz", "guitar"]
|
532
570
|
# # Say you want to sort these strings by length.
|
533
571
|
# v.sort(ascending: false) { |a,b| a.length <=> b.length }
|
534
|
-
def sort opts={}
|
535
|
-
opts = {
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
vector_index = @data.each_with_index
|
540
|
-
vector_index =
|
541
|
-
if block_given?
|
542
|
-
vector_index.sort { |a,b| yield(a[0], b[0]) }
|
543
|
-
else
|
544
|
-
vector_index.sort { |(av, ai), (bv, bi)|
|
545
|
-
if !av.nil? && !bv.nil?
|
546
|
-
av <=> bv
|
547
|
-
elsif av.nil? && bv.nil?
|
548
|
-
ai <=> bi
|
549
|
-
elsif av.nil?
|
550
|
-
opts[:ascending] ? -1 : 1
|
551
|
-
else
|
552
|
-
opts[:ascending] ? 1 : -1
|
553
|
-
end
|
554
|
-
}
|
555
|
-
end
|
556
|
-
vector_index.reverse! unless opts[:ascending]
|
572
|
+
def sort opts={}, &block
|
573
|
+
opts = {ascending: true}.merge(opts)
|
574
|
+
|
575
|
+
vector_index = resort_index(@data.each_with_index, opts, &block)
|
557
576
|
vector, index = vector_index.transpose
|
558
|
-
old_index = @index.to_a
|
559
|
-
index = index.map { |i| old_index[i] }
|
560
577
|
|
561
|
-
|
578
|
+
index = @index.reorder index
|
579
|
+
|
580
|
+
Daru::Vector.new(vector, index: index, name: @name, dtype: @dtype)
|
581
|
+
end
|
582
|
+
|
583
|
+
DEFAULT_SORTER = lambda { |(lv, li), (rv, ri)|
|
584
|
+
case
|
585
|
+
when lv.nil? && rv.nil?
|
586
|
+
li <=> ri
|
587
|
+
when lv.nil?
|
588
|
+
-1
|
589
|
+
when rv.nil?
|
590
|
+
1
|
591
|
+
else
|
592
|
+
lv <=> rv
|
593
|
+
end
|
594
|
+
}
|
595
|
+
|
596
|
+
def resort_index vector_index, opts
|
597
|
+
if block_given?
|
598
|
+
vector_index.sort { |(lv, _li), (rv, _ri)| yield(lv, rv) }
|
599
|
+
else
|
600
|
+
vector_index.sort(&DEFAULT_SORTER)
|
601
|
+
end
|
602
|
+
.tap { |res| res.reverse! unless opts[:ascending] }
|
562
603
|
end
|
563
604
|
|
564
605
|
# Just sort the data and get an Array in return using Enumerable#sort.
|
565
606
|
# Non-destructive.
|
607
|
+
# :nocov:
|
566
608
|
def sorted_data &block
|
567
609
|
@data.to_a.sort(&block)
|
568
610
|
end
|
569
|
-
|
570
|
-
# Returns *true* if the value passed is actually exists or is not marked as
|
571
|
-
# a *missing value*.
|
572
|
-
def exists? value
|
573
|
-
!@missing_values.key?(self[index_of(value)])
|
574
|
-
end
|
611
|
+
# :nocov:
|
575
612
|
|
576
613
|
# Like map, but returns a Daru::Vector with the returned values.
|
577
614
|
def recode dt=nil, &block
|
@@ -593,19 +630,12 @@ module Daru
|
|
593
630
|
def delete_if
|
594
631
|
return to_enum(:delete_if) unless block_given?
|
595
632
|
|
596
|
-
keep_e =
|
597
|
-
keep_i = []
|
598
|
-
each_with_index do |n, i|
|
599
|
-
unless yield(n)
|
600
|
-
keep_e << n
|
601
|
-
keep_i << i
|
602
|
-
end
|
603
|
-
end
|
633
|
+
keep_e, keep_i = each_with_index.select { |n, _i| !yield(n) }.transpose
|
604
634
|
|
605
635
|
@data = cast_vector_to @dtype, keep_e
|
606
636
|
@index = Daru::Index.new(keep_i)
|
607
|
-
|
608
|
-
|
637
|
+
|
638
|
+
update_position_cache
|
609
639
|
|
610
640
|
self
|
611
641
|
end
|
@@ -614,32 +644,16 @@ module Daru
|
|
614
644
|
def keep_if
|
615
645
|
return to_enum(:keep_if) unless block_given?
|
616
646
|
|
617
|
-
|
618
|
-
keep_i = []
|
619
|
-
each_with_index do |n, i|
|
620
|
-
if yield(n)
|
621
|
-
keep_e << n
|
622
|
-
keep_i << i
|
623
|
-
end
|
624
|
-
end
|
625
|
-
|
626
|
-
@data = cast_vector_to @dtype, keep_e
|
627
|
-
@index = Daru::Index.new(keep_i)
|
628
|
-
set_missing_positions unless Daru.lazy_update
|
629
|
-
set_size
|
630
|
-
|
631
|
-
self
|
647
|
+
delete_if { |val| !yield(val) }
|
632
648
|
end
|
633
649
|
|
634
650
|
# Reports all values that doesn't comply with a condition.
|
635
651
|
# Returns a hash with the index of data and the invalid data.
|
636
652
|
def verify
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
h
|
653
|
+
(0...size)
|
654
|
+
.map { |i| [i, @data[i]] }
|
655
|
+
.reject { |_i, val| yield(val) }
|
656
|
+
.to_h
|
643
657
|
end
|
644
658
|
|
645
659
|
# Return an Array with the data splitted by a separator.
|
@@ -674,29 +688,19 @@ module Daru
|
|
674
688
|
#
|
675
689
|
def split_by_separator sep=','
|
676
690
|
split_data = splitted sep
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
out[f].push(nil)
|
685
|
-
end
|
686
|
-
else
|
687
|
-
factors.each do |f|
|
688
|
-
out[f].push(r.include?(f) ? 1 : 0)
|
689
|
-
end
|
690
|
-
end
|
691
|
-
end
|
692
|
-
|
693
|
-
out.map { |k, v| [k, Daru::Vector.new(v)] }.to_h
|
691
|
+
split_data
|
692
|
+
.flatten.uniq.compact.map do |key|
|
693
|
+
[
|
694
|
+
key,
|
695
|
+
Daru::Vector.new(split_data.map { |v| split_value(key, v) })
|
696
|
+
]
|
697
|
+
end.to_h
|
694
698
|
end
|
695
699
|
|
696
700
|
def split_by_separator_freq(sep=',')
|
697
|
-
split_by_separator(sep).map
|
698
|
-
[k, v.inject
|
699
|
-
|
701
|
+
split_by_separator(sep).map { |k, v|
|
702
|
+
[k, v.map(&:to_i).inject(:+)]
|
703
|
+
}.to_h
|
700
704
|
end
|
701
705
|
|
702
706
|
def reset_index!
|
@@ -718,23 +722,15 @@ module Daru
|
|
718
722
|
# # 1 false
|
719
723
|
# # 2 false
|
720
724
|
# # 3 true
|
725
|
+
#
|
721
726
|
def is_nil?
|
722
|
-
|
723
|
-
|
724
|
-
nil_truth_vector[idx] = self[idx].nil? ? true : false
|
725
|
-
end
|
726
|
-
|
727
|
-
nil_truth_vector
|
727
|
+
# FIXME: EXTREMELY bad name for method not returning boolean - zverok, 2016-05-18
|
728
|
+
recode(&:nil?)
|
728
729
|
end
|
729
730
|
|
730
731
|
# Opposite of #is_nil?
|
731
732
|
def not_nil?
|
732
|
-
|
733
|
-
@index.each do |idx|
|
734
|
-
nil_truth_vector[idx] = self[idx].nil? ? false : true
|
735
|
-
end
|
736
|
-
|
737
|
-
nil_truth_vector
|
733
|
+
recode { |v| !v.nil? }
|
738
734
|
end
|
739
735
|
|
740
736
|
# Replace all nils in the vector with the value passed as an argument. Destructive.
|
@@ -744,7 +740,7 @@ module Daru
|
|
744
740
|
#
|
745
741
|
# * +replacement+ - The value which should replace all nils
|
746
742
|
def replace_nils! replacement
|
747
|
-
|
743
|
+
indexes(*Daru::MISSING_VALUES).each do |idx|
|
748
744
|
self[idx] = replacement
|
749
745
|
end
|
750
746
|
|
@@ -765,13 +761,13 @@ module Daru
|
|
765
761
|
# ts.lag # => [nil, 0.69, 0.23, 0.44, ...]
|
766
762
|
# ts.lag(2) # => [nil, nil, 0.69, 0.23, ...]
|
767
763
|
def lag k=1
|
768
|
-
return dup if k
|
764
|
+
return dup if k.zero?
|
769
765
|
|
770
766
|
dat = @data.to_a.dup
|
771
767
|
(dat.size - 1).downto(k) { |i| dat[i] = dat[i - k] }
|
772
768
|
(0...k).each { |i| dat[i] = nil }
|
773
769
|
|
774
|
-
Daru::Vector.new(dat, index: @index, name: @name
|
770
|
+
Daru::Vector.new(dat, index: @index, name: @name)
|
775
771
|
end
|
776
772
|
|
777
773
|
def detach_index
|
@@ -788,7 +784,19 @@ module Daru
|
|
788
784
|
|
789
785
|
# number of non-missing elements
|
790
786
|
def n_valid
|
791
|
-
|
787
|
+
size - indexes(*Daru::MISSING_VALUES).size
|
788
|
+
end
|
789
|
+
deprecate :n_valid, :count_values, 2016, 10
|
790
|
+
|
791
|
+
# Count the number of values specified
|
792
|
+
# @param [Array] *values values to count for
|
793
|
+
# @return [Integer] the number of times the values mentioned occurs
|
794
|
+
# @example
|
795
|
+
# dv = Daru::Vector.new [1, 2, 1, 2, 3, 4, nil, nil]
|
796
|
+
# dv.count_values nil
|
797
|
+
# # => 2
|
798
|
+
def count_values(*values)
|
799
|
+
positions(*values).size
|
792
800
|
end
|
793
801
|
|
794
802
|
# Returns *true* if an index exists
|
@@ -796,6 +804,11 @@ module Daru
|
|
796
804
|
@index.include? index
|
797
805
|
end
|
798
806
|
|
807
|
+
# @return [Daru::DataFrame] the vector as a single-vector dataframe
|
808
|
+
def to_df
|
809
|
+
Daru::DataFrame.new({@name => @data}, name: @name, index: @index)
|
810
|
+
end
|
811
|
+
|
799
812
|
# Convert Vector to a horizontal or vertical Ruby Matrix.
|
800
813
|
#
|
801
814
|
# == Arguments
|
@@ -811,11 +824,39 @@ module Daru
|
|
811
824
|
end
|
812
825
|
end
|
813
826
|
|
827
|
+
# Convert vector to nmatrix object
|
828
|
+
# @param [Symbol] axis :horizontal or :vertical
|
829
|
+
# @return [NMatrix] NMatrix object containing all values of the vector
|
830
|
+
# @example
|
831
|
+
# dv = Daru::Vector.new [1, 2, 3]
|
832
|
+
# dv.to_nmatrix
|
833
|
+
# # =>
|
834
|
+
# # [
|
835
|
+
# # [1, 2, 3] ]
|
836
|
+
def to_nmatrix axis=:horizontal
|
837
|
+
raise ArgumentError, 'Can not convert to nmatrix'\
|
838
|
+
'because the vector is numeric' unless numeric? && !include?(nil)
|
839
|
+
|
840
|
+
case axis
|
841
|
+
when :horizontal
|
842
|
+
NMatrix.new [1, size], to_a
|
843
|
+
when :vertical
|
844
|
+
NMatrix.new [size, 1], to_a
|
845
|
+
else
|
846
|
+
raise ArgumentError, 'Invalid axis specified. '\
|
847
|
+
'Valid axis are :horizontal and :vertical'
|
848
|
+
end
|
849
|
+
end
|
850
|
+
|
814
851
|
# If dtype != gsl, will convert data to GSL::Vector with to_a. Otherwise returns
|
815
852
|
# the stored GSL::Vector object.
|
816
853
|
def to_gsl
|
817
854
|
raise NoMethodError, 'Install gsl-nmatrix for access to this functionality.' unless Daru.has_gsl?
|
818
|
-
dtype == :gsl
|
855
|
+
if dtype == :gsl
|
856
|
+
@data.data
|
857
|
+
else
|
858
|
+
GSL::Vector.alloc(reject_values(*Daru::MISSING_VALUES).to_a)
|
859
|
+
end
|
819
860
|
end
|
820
861
|
|
821
862
|
# Convert to hash (explicit). Hash keys are indexes and values are the correspoding elements
|
@@ -835,30 +876,12 @@ module Daru
|
|
835
876
|
|
836
877
|
# Convert to html for iruby
|
837
878
|
def to_html threshold=30
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
'</tr>'
|
845
|
-
html += '<tr><th> </th><th>' + name.to_s + '</th></tr>'
|
846
|
-
@index.each_with_index do |index, num|
|
847
|
-
html += '<tr><td>' + index.to_s + '</td>' + '<td>' + self[index].to_s + '</td></tr>'
|
848
|
-
|
849
|
-
next if num <= threshold
|
850
|
-
html += '<tr><td>...</td><td>...</td></tr>'
|
851
|
-
|
852
|
-
last_index = @index.to_a.last
|
853
|
-
html += '<tr>' \
|
854
|
-
'<td>' + last_index.to_s + '</td>' \
|
855
|
-
'<td>' + self[last_index].to_s + '</td>' \
|
856
|
-
'</tr>'
|
857
|
-
break
|
858
|
-
end
|
859
|
-
html += '</table>'
|
860
|
-
|
861
|
-
html
|
879
|
+
path = if index.is_a?(MultiIndex)
|
880
|
+
File.expand_path('../iruby/templates/vector_mi.html.erb', __FILE__)
|
881
|
+
else
|
882
|
+
File.expand_path('../iruby/templates/vector.html.erb', __FILE__)
|
883
|
+
end
|
884
|
+
ERB.new(File.read(path).strip).result(binding)
|
862
885
|
end
|
863
886
|
|
864
887
|
def to_s
|
@@ -870,10 +893,11 @@ module Daru
|
|
870
893
|
ReportBuilder.new(no_title: true).add(self).send(method)
|
871
894
|
end
|
872
895
|
|
873
|
-
|
896
|
+
# :nocov:
|
897
|
+
def report_building b # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
874
898
|
b.section(name: name) do |s|
|
875
899
|
s.text "n :#{size}"
|
876
|
-
s.text "n valid:#{
|
900
|
+
s.text "n valid:#{count_values(*Daru::MISSING_VALUES)}"
|
877
901
|
if @type == :object
|
878
902
|
s.text "factors: #{factors.to_a.join(',')}"
|
879
903
|
s.text "mode: #{mode}"
|
@@ -881,7 +905,7 @@ module Daru
|
|
881
905
|
s.table(name: 'Distribution') do |t|
|
882
906
|
frequencies.sort_by(&:to_s).each do |k,v|
|
883
907
|
key = @index.include?(k) ? @index[k] : k
|
884
|
-
t.row [key, v, ('%0.2f%%' % (v.quo(
|
908
|
+
t.row [key, v, ('%0.2f%%' % (v.quo(count_values(*Daru::MISSING_VALUES))*100))]
|
885
909
|
end
|
886
910
|
end
|
887
911
|
end
|
@@ -898,47 +922,71 @@ module Daru
|
|
898
922
|
end
|
899
923
|
end
|
900
924
|
end
|
925
|
+
# :nocov:
|
901
926
|
|
902
927
|
# Over rides original inspect for pretty printing in irb
|
903
928
|
def inspect spacing=20, threshold=15
|
904
|
-
|
905
|
-
|
906
|
-
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
|
911
|
-
|
912
|
-
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
|
917
|
-
|
918
|
-
|
919
|
-
|
920
|
-
|
921
|
-
|
922
|
-
|
923
|
-
|
924
|
-
|
925
|
-
|
929
|
+
row_headers = index.is_a?(MultiIndex) ? index.sparse_tuples : index.to_a
|
930
|
+
|
931
|
+
"#<#{self.class}(#{size})#{':cataegory' if category?}>\n" +
|
932
|
+
Formatters::Table.format(
|
933
|
+
to_a.lazy.map { |v| [v] },
|
934
|
+
headers: @name && [@name],
|
935
|
+
row_headers: row_headers,
|
936
|
+
threshold: threshold,
|
937
|
+
spacing: spacing
|
938
|
+
)
|
939
|
+
end
|
940
|
+
|
941
|
+
# Sets new index for vector. Preserves index->value correspondence.
|
942
|
+
# Sets nil for new index keys absent from original index.
|
943
|
+
# @note Unlike #reorder! which takes positions as input it takes
|
944
|
+
# index as an input to reorder the vector
|
945
|
+
# @param [Daru::Index, Daru::MultiIndex] new_index new index to order with
|
946
|
+
# @return [Daru::Vector] vector reindexed with new index
|
947
|
+
def reindex! new_index
|
948
|
+
values = []
|
949
|
+
each_with_index do |val, i|
|
950
|
+
values[new_index[i]] = val if new_index.include?(i)
|
926
951
|
end
|
927
|
-
|
952
|
+
values.fill(nil, values.size, new_index.size - values.size)
|
953
|
+
|
954
|
+
@data = cast_vector_to @dtype, values
|
955
|
+
@index = new_index
|
928
956
|
|
929
|
-
|
957
|
+
update_position_cache
|
958
|
+
|
959
|
+
self
|
960
|
+
end
|
961
|
+
|
962
|
+
# Reorder the vector with given positions
|
963
|
+
# @note Unlike #reindex! which takes index as input, it takes
|
964
|
+
# positions as an input to reorder the vector
|
965
|
+
# @param [Array] order the order to reorder the vector with
|
966
|
+
# @return reordered vector
|
967
|
+
# @example
|
968
|
+
# dv = Daru::Vector.new [3, 2, 1], index: ['c', 'b', 'a']
|
969
|
+
# dv.reorder! [2, 1, 0]
|
970
|
+
# # => #<Daru::Vector(3)>
|
971
|
+
# # a 1
|
972
|
+
# # b 2
|
973
|
+
# # c 3
|
974
|
+
def reorder! order
|
975
|
+
@index = @index.reorder order
|
976
|
+
@data = order.map { |i| @data[i] }
|
977
|
+
update_position_cache
|
978
|
+
self
|
979
|
+
end
|
980
|
+
|
981
|
+
# Non-destructive version of #reorder!
|
982
|
+
def reorder order
|
983
|
+
dup.reorder! order
|
930
984
|
end
|
931
985
|
|
932
986
|
# Create a new vector with a different index, and preserve the indexing of
|
933
987
|
# current elements.
|
934
988
|
def reindex new_index
|
935
|
-
|
936
|
-
|
937
|
-
new_index.each do |idx|
|
938
|
-
vector[idx] = @index.include?(idx) ? self[idx] : nil
|
939
|
-
end
|
940
|
-
|
941
|
-
vector
|
989
|
+
dup.reindex!(new_index)
|
942
990
|
end
|
943
991
|
|
944
992
|
def index= idx
|
@@ -956,17 +1004,16 @@ module Daru
|
|
956
1004
|
#
|
957
1005
|
# @param new_name [Symbol] The new name.
|
958
1006
|
def rename new_name
|
959
|
-
if new_name.is_a?(Numeric)
|
960
|
-
@name = new_name
|
961
|
-
return
|
962
|
-
end
|
963
|
-
|
964
1007
|
@name = new_name
|
1008
|
+
self
|
965
1009
|
end
|
966
1010
|
|
967
|
-
|
1011
|
+
alias_method :name=, :rename
|
1012
|
+
|
1013
|
+
# Duplicated a vector
|
1014
|
+
# @return [Daru::Vector] duplicated vector
|
968
1015
|
def dup
|
969
|
-
Daru::Vector.new @data.dup, name: @name,
|
1016
|
+
Daru::Vector.new @data.dup, name: @name, index: @index.dup
|
970
1017
|
end
|
971
1018
|
|
972
1019
|
# == Bootstrap
|
@@ -1019,8 +1066,8 @@ module Daru
|
|
1019
1066
|
#
|
1020
1067
|
# == Reference:
|
1021
1068
|
# * Sawyer, S. (2005). Resampling Data: Using a Statistical Jacknife.
|
1022
|
-
def jackknife(estimators, k=1)
|
1023
|
-
raise "n should be divisible by k:#{k}" unless size % k
|
1069
|
+
def jackknife(estimators, k=1) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
1070
|
+
raise "n should be divisible by k:#{k}" unless (size % k).zero?
|
1024
1071
|
|
1025
1072
|
nb = (size / k).to_i
|
1026
1073
|
h_est, es, ps = prepare_bootstrap(estimators)
|
@@ -1057,62 +1104,123 @@ module Daru
|
|
1057
1104
|
# vector, setting this to false will return the same vector.
|
1058
1105
|
# Otherwise, a duplicate will be returned irrespective of
|
1059
1106
|
# presence of missing data.
|
1060
|
-
def only_valid as_a=:vector, duplicate=true
|
1061
|
-
return dup if !has_missing_data? && as_a == :vector && duplicate
|
1062
|
-
return self if !has_missing_data? && as_a == :vector && !duplicate
|
1063
|
-
return to_a if !has_missing_data? && as_a != :vector
|
1064
|
-
|
1065
|
-
new_index = @index.to_a - missing_positions
|
1066
|
-
new_vector = new_index.map do |idx|
|
1067
|
-
self[idx]
|
1068
|
-
end
|
1069
1107
|
|
1070
|
-
|
1108
|
+
def only_valid as_a=:vector, _duplicate=true
|
1109
|
+
# FIXME: Now duplicate is just ignored.
|
1110
|
+
# There are no spec that fail on this case, so I'll leave it
|
1111
|
+
# this way for now - zverok, 2016-05-07
|
1071
1112
|
|
1072
|
-
|
1113
|
+
new_index = @index.to_a - indexes(*Daru::MISSING_VALUES)
|
1114
|
+
new_vector = new_index.map { |idx| self[idx] }
|
1115
|
+
|
1116
|
+
if as_a == :vector
|
1117
|
+
Daru::Vector.new new_vector, index: new_index, name: @name, dtype: dtype
|
1118
|
+
else
|
1119
|
+
new_vector
|
1120
|
+
end
|
1121
|
+
end
|
1122
|
+
deprecate :only_valid, :reject_values, 2016, 10
|
1123
|
+
|
1124
|
+
# Return a vector with specified values removed
|
1125
|
+
# @param [Array] *values values to reject from resultant vector
|
1126
|
+
# @return [Daru::Vector] vector with specified values removed
|
1127
|
+
# @example
|
1128
|
+
# dv = Daru::Vector.new [1, 2, nil, Float::NAN]
|
1129
|
+
# dv.reject_values nil, Float::NAN
|
1130
|
+
# # => #<Daru::Vector(2)>
|
1131
|
+
# # 0 1
|
1132
|
+
# # 1 2
|
1133
|
+
def reject_values(*values)
|
1134
|
+
resultant_pos = size.times.to_a - positions(*values)
|
1135
|
+
dv = at(*resultant_pos)
|
1136
|
+
# Handle the case when number of positions is 1
|
1137
|
+
# and hence #at doesn't return a vector
|
1138
|
+
if dv.is_a?(Daru::Vector)
|
1139
|
+
dv
|
1140
|
+
else
|
1141
|
+
pos = resultant_pos.first
|
1142
|
+
at(pos..pos)
|
1143
|
+
end
|
1144
|
+
end
|
1145
|
+
|
1146
|
+
# Return indexes of values specified
|
1147
|
+
# @param [Array] *values values to find indexes for
|
1148
|
+
# @return [Array] array of indexes of values specified
|
1149
|
+
# @example
|
1150
|
+
# dv = Daru::Vector.new [1, 2, nil, Float::NAN], index: 11..14
|
1151
|
+
# dv.indexes nil, Float::NAN
|
1152
|
+
# # => [13, 14]
|
1153
|
+
def indexes(*values)
|
1154
|
+
index.to_a.values_at(*positions(*values))
|
1155
|
+
end
|
1156
|
+
|
1157
|
+
# Replaces specified values with a new value
|
1158
|
+
# @param [Array] old_values array of values to replace
|
1159
|
+
# @param [object] new_value new value to replace with
|
1160
|
+
# @note It performs the replace in place.
|
1161
|
+
# @return [Daru::Vector] Same vector itself with values
|
1162
|
+
# replaced with new value
|
1163
|
+
# @example
|
1164
|
+
# dv = Daru::Vector.new [1, 2, :a, :b]
|
1165
|
+
# dv.replace_values [:a, :b], nil
|
1166
|
+
# dv
|
1167
|
+
# # =>
|
1168
|
+
# # #<Daru::Vector:19903200 @name = nil @metadata = {} @size = 4 >
|
1169
|
+
# # nil
|
1170
|
+
# # 0 1
|
1171
|
+
# # 1 2
|
1172
|
+
# # 2 nil
|
1173
|
+
# # 3 nil
|
1174
|
+
def replace_values(old_values, new_value)
|
1175
|
+
old_values = [old_values] unless old_values.is_a? Array
|
1176
|
+
size.times do |pos|
|
1177
|
+
set_at([pos], new_value) if include_with_nan? old_values, at(pos)
|
1178
|
+
end
|
1179
|
+
self
|
1073
1180
|
end
|
1074
1181
|
|
1075
1182
|
# Returns a Vector containing only missing data (preserves indexes).
|
1076
1183
|
def only_missing as_a=:vector
|
1077
1184
|
if as_a == :vector
|
1078
|
-
self[*
|
1185
|
+
self[*indexes(*Daru::MISSING_VALUES)]
|
1079
1186
|
elsif as_a == :array
|
1080
|
-
self[*
|
1187
|
+
self[*indexes(*Daru::MISSING_VALUES)].to_a
|
1081
1188
|
end
|
1082
1189
|
end
|
1190
|
+
deprecate :only_missing, nil, 2016, 10
|
1083
1191
|
|
1084
1192
|
# Returns a Vector with only numerical data. Missing data is included
|
1085
1193
|
# but non-Numeric objects are excluded. Preserves index.
|
1086
1194
|
def only_numerics
|
1087
|
-
numeric_indexes =
|
1088
|
-
|
1089
|
-
|
1090
|
-
|
1091
|
-
end
|
1195
|
+
numeric_indexes =
|
1196
|
+
each_with_index
|
1197
|
+
.select { |v, _i| v.is_a?(Numeric) || v.nil? }
|
1198
|
+
.map(&:last)
|
1092
1199
|
|
1093
1200
|
self[*numeric_indexes]
|
1094
1201
|
end
|
1095
1202
|
|
1203
|
+
DATE_REGEXP = /^(\d{2}-\d{2}-\d{4}|\d{4}-\d{2}-\d{2})$/
|
1204
|
+
|
1096
1205
|
# Returns the database type for the vector, according to its content
|
1097
1206
|
def db_type
|
1098
1207
|
# first, detect any character not number
|
1099
|
-
|
1100
|
-
|
1101
|
-
|
1102
|
-
|
1103
|
-
|
1104
|
-
|
1105
|
-
|
1106
|
-
return 'DOUBLE'
|
1208
|
+
case
|
1209
|
+
when @data.any? { |v| v.to_s =~ DATE_REGEXP }
|
1210
|
+
'DATE'
|
1211
|
+
when @data.any? { |v| v.to_s =~ /[^0-9e.-]/ }
|
1212
|
+
'VARCHAR (255)'
|
1213
|
+
when @data.any? { |v| v.to_s =~ /\./ }
|
1214
|
+
'DOUBLE'
|
1107
1215
|
else
|
1108
|
-
|
1216
|
+
'INTEGER'
|
1109
1217
|
end
|
1110
1218
|
end
|
1111
1219
|
|
1112
1220
|
# Copies the structure of the vector (i.e the index, size, etc.) and fills all
|
1113
1221
|
# all values with nils.
|
1114
1222
|
def clone_structure
|
1115
|
-
Daru::Vector.new(([nil]
|
1223
|
+
Daru::Vector.new(([nil]*size), name: @name, index: @index.dup)
|
1116
1224
|
end
|
1117
1225
|
|
1118
1226
|
# Save the vector to a file
|
@@ -1129,38 +1237,156 @@ module Daru
|
|
1129
1237
|
data: @data.to_a,
|
1130
1238
|
dtype: @dtype,
|
1131
1239
|
name: @name,
|
1132
|
-
|
1133
|
-
index: @index,
|
1134
|
-
missing_values: @missing_values
|
1240
|
+
index: @index
|
1135
1241
|
)
|
1136
1242
|
end
|
1137
1243
|
|
1138
|
-
|
1139
|
-
h = Marshal.load(data)
|
1140
|
-
Daru::Vector.new(h[:data],
|
1141
|
-
index: h[:index],
|
1142
|
-
name: h[:name], metadata: h[:metadata],
|
1143
|
-
dtype: h[:dtype], missing_values: h[:missing_values])
|
1144
|
-
end
|
1145
|
-
|
1244
|
+
# :nocov:
|
1146
1245
|
def daru_vector(*)
|
1147
1246
|
self
|
1148
1247
|
end
|
1248
|
+
# :nocov:
|
1149
1249
|
|
1150
1250
|
alias :dv :daru_vector
|
1151
1251
|
|
1252
|
+
# Converts a non category type vector to category type vector.
|
1253
|
+
# @param [Hash] opts options to convert to category
|
1254
|
+
# @option opts [true, false] :ordered Specify if vector is ordered or not.
|
1255
|
+
# If it is ordered, it can be sorted and min, max like functions would work
|
1256
|
+
# @option opts [Array] :categories set categories in the specified order
|
1257
|
+
# @return [Daru::Vector] vector with type category
|
1258
|
+
def to_category opts={}
|
1259
|
+
dv = Daru::Vector.new to_a, type: :category, name: @name, index: @index
|
1260
|
+
dv.ordered = opts[:ordered] || false
|
1261
|
+
dv.categories = opts[:categories] if opts[:categories]
|
1262
|
+
dv
|
1263
|
+
end
|
1264
|
+
|
1152
1265
|
def method_missing(name, *args, &block)
|
1266
|
+
# FIXME: it is shamefully fragile. Should be either made stronger
|
1267
|
+
# (string/symbol dychotomy, informative errors) or removed totally. - zverok
|
1153
1268
|
if name =~ /(.+)\=/
|
1154
|
-
self[
|
1269
|
+
self[$1.to_sym] = args[0]
|
1155
1270
|
elsif has_index?(name)
|
1156
1271
|
self[name]
|
1157
1272
|
else
|
1158
|
-
super
|
1273
|
+
super
|
1274
|
+
end
|
1275
|
+
end
|
1276
|
+
|
1277
|
+
def respond_to_missing?(name, include_private=false)
|
1278
|
+
name.to_s.end_with?('=') || has_index?(name) || super
|
1279
|
+
end
|
1280
|
+
|
1281
|
+
# Partition a numeric variable into categories.
|
1282
|
+
# @param [Array<Numeric>] partitions an array whose consecutive elements
|
1283
|
+
# provide intervals for categories
|
1284
|
+
# @param [Hash] opts options to cut the partition
|
1285
|
+
# @option opts [:left, :right] :close_at specifies whether the interval closes at
|
1286
|
+
# the right side of left side
|
1287
|
+
# @option opts [Array] :labels names of the categories
|
1288
|
+
# @return [Daru::Vector] numeric variable converted to categorical variable
|
1289
|
+
# @example
|
1290
|
+
# heights = Daru::Vector.new [30, 35, 32, 50, 42, 51]
|
1291
|
+
# height_cat = heights.cut [30, 40, 50, 60], labels=['low', 'medium', 'high']
|
1292
|
+
# # => #<Daru::Vector(6)>
|
1293
|
+
# # 0 low
|
1294
|
+
# # 1 low
|
1295
|
+
# # 2 low
|
1296
|
+
# # 3 high
|
1297
|
+
# # 4 medium
|
1298
|
+
# # 5 high
|
1299
|
+
def cut partitions, opts={}
|
1300
|
+
close_at, labels = opts[:close_at] || :right, opts[:labels]
|
1301
|
+
partitions = partitions.to_a
|
1302
|
+
values = to_a.map { |val| cut_find_category partitions, val, close_at }
|
1303
|
+
cats = cut_categories(partitions, close_at)
|
1304
|
+
|
1305
|
+
dv = Daru::Vector.new values,
|
1306
|
+
index: @index,
|
1307
|
+
type: :category,
|
1308
|
+
categories: cats
|
1309
|
+
|
1310
|
+
# Rename categories if new labels provided
|
1311
|
+
if labels
|
1312
|
+
dv.rename_categories Hash[cats.zip(labels)]
|
1313
|
+
else
|
1314
|
+
dv
|
1315
|
+
end
|
1316
|
+
end
|
1317
|
+
|
1318
|
+
def positions(*values)
|
1319
|
+
case values
|
1320
|
+
when [nil]
|
1321
|
+
nil_positions
|
1322
|
+
when [Float::NAN]
|
1323
|
+
nan_positions
|
1324
|
+
when [nil, Float::NAN], [Float::NAN, nil]
|
1325
|
+
nil_positions + nan_positions
|
1326
|
+
else
|
1327
|
+
size.times.select { |i| include_with_nan? values, @data[i] }
|
1159
1328
|
end
|
1160
1329
|
end
|
1161
1330
|
|
1162
1331
|
private
|
1163
1332
|
|
1333
|
+
def nil_positions
|
1334
|
+
@nil_positions ||
|
1335
|
+
@nil_positions = size.times.select { |i| @data[i].nil? }
|
1336
|
+
end
|
1337
|
+
|
1338
|
+
def nan_positions
|
1339
|
+
@nan_positions ||
|
1340
|
+
@nan_positions = size.times.select do |i|
|
1341
|
+
@data[i].respond_to?(:nan?) && @data[i].nan?
|
1342
|
+
end
|
1343
|
+
end
|
1344
|
+
|
1345
|
+
def initialize_vector source, opts
|
1346
|
+
index, source = parse_source(source, opts)
|
1347
|
+
set_name opts[:name]
|
1348
|
+
|
1349
|
+
@data = cast_vector_to(opts[:dtype] || :array, source, opts[:nm_dtype])
|
1350
|
+
@index = Index.coerce(index || @data.size)
|
1351
|
+
|
1352
|
+
guard_sizes!
|
1353
|
+
|
1354
|
+
@possibly_changed_type = true
|
1355
|
+
# Include plotting functionality
|
1356
|
+
self.plotting_library = Daru.plotting_library
|
1357
|
+
end
|
1358
|
+
|
1359
|
+
def parse_source source, opts
|
1360
|
+
if source.is_a?(Hash)
|
1361
|
+
[source.keys, source.values]
|
1362
|
+
else
|
1363
|
+
[opts[:index], source || []]
|
1364
|
+
end
|
1365
|
+
end
|
1366
|
+
|
1367
|
+
def guard_sizes!
|
1368
|
+
if @index.size > @data.size
|
1369
|
+
cast(dtype: :array) # NM with nils seg faults
|
1370
|
+
@data.fill(nil, @data.size...@index.size)
|
1371
|
+
elsif @index.size < @data.size
|
1372
|
+
raise IndexError, "Expected index size >= vector size. Index size : #{@index.size}, vector size : #{@data.size}"
|
1373
|
+
end
|
1374
|
+
end
|
1375
|
+
|
1376
|
+
def guard_type_check value
|
1377
|
+
@possibly_changed_type = true \
|
1378
|
+
if object? && (value.nil? || value.is_a?(Numeric)) ||
|
1379
|
+
numeric? && !value.is_a?(Numeric) && !value.nil?
|
1380
|
+
end
|
1381
|
+
|
1382
|
+
def split_value key, v
|
1383
|
+
case
|
1384
|
+
when v.nil? then nil
|
1385
|
+
when v.include?(key) then 1
|
1386
|
+
else 0
|
1387
|
+
end
|
1388
|
+
end
|
1389
|
+
|
1164
1390
|
# For an array or hash of estimators methods, returns
|
1165
1391
|
# an array with three elements
|
1166
1392
|
# 1.- A hash with estimators names as keys and lambdas as values
|
@@ -1180,18 +1406,6 @@ module Daru
|
|
1180
1406
|
[h_est, h_est.keys, bss]
|
1181
1407
|
end
|
1182
1408
|
|
1183
|
-
def keep? a, b, order
|
1184
|
-
eval = yield(a, b)
|
1185
|
-
if order == :ascending
|
1186
|
-
return true if eval == -1
|
1187
|
-
return false if eval == 1
|
1188
|
-
elsif order == :descending
|
1189
|
-
return false if eval == -1
|
1190
|
-
return true if eval == 1
|
1191
|
-
end
|
1192
|
-
false
|
1193
|
-
end
|
1194
|
-
|
1195
1409
|
# Note: To maintain sanity, this _MUST_ be the _ONLY_ place in daru where the
|
1196
1410
|
# @dtype variable is set and the underlying data type of vector changed.
|
1197
1411
|
def cast_vector_to dtype, source=nil, nm_dtype=nil
|
@@ -1203,25 +1417,13 @@ module Daru
|
|
1203
1417
|
when :nmatrix then Daru::Accessors::NMatrixWrapper.new(source, self, nm_dtype)
|
1204
1418
|
when :gsl then Daru::Accessors::GSLWrapper.new(source, self)
|
1205
1419
|
when :mdarray then raise NotImplementedError, 'MDArray not yet supported.'
|
1206
|
-
else raise "Unknown dtype #{dtype}"
|
1420
|
+
else raise ArgumentError, "Unknown dtype #{dtype}"
|
1207
1421
|
end
|
1208
1422
|
|
1209
1423
|
@dtype = dtype || :array
|
1210
1424
|
new_vector
|
1211
1425
|
end
|
1212
1426
|
|
1213
|
-
def index_for index
|
1214
|
-
if @index.include?(index)
|
1215
|
-
@index[index]
|
1216
|
-
elsif index.is_a?(Numeric)
|
1217
|
-
index
|
1218
|
-
end
|
1219
|
-
end
|
1220
|
-
|
1221
|
-
def set_size
|
1222
|
-
@size = @data.size
|
1223
|
-
end
|
1224
|
-
|
1225
1427
|
def set_name name # rubocop:disable Style/AccessorMethodName
|
1226
1428
|
@name =
|
1227
1429
|
if name.is_a?(Numeric) then name
|
@@ -1232,38 +1434,109 @@ module Daru
|
|
1232
1434
|
end
|
1233
1435
|
end
|
1234
1436
|
|
1235
|
-
|
1236
|
-
|
1237
|
-
|
1238
|
-
|
1437
|
+
# Raises IndexError when one of the positions is an invalid position
|
1438
|
+
def validate_positions *positions
|
1439
|
+
positions = [positions] if positions.is_a? Integer
|
1440
|
+
positions.each do |pos|
|
1441
|
+
raise IndexError, "#{pos} is not a valid position." if pos >= size
|
1239
1442
|
end
|
1240
1443
|
end
|
1241
1444
|
|
1242
|
-
|
1243
|
-
|
1244
|
-
|
1445
|
+
# coerce ranges, integers and array in appropriate ways
|
1446
|
+
def coerce_positions *positions
|
1447
|
+
if positions.size == 1
|
1448
|
+
case positions.first
|
1449
|
+
when Integer
|
1450
|
+
positions.first
|
1451
|
+
when Range
|
1452
|
+
size.times.to_a[positions.first]
|
1453
|
+
else
|
1454
|
+
raise ArgumentError, 'Unkown position type.'
|
1455
|
+
end
|
1245
1456
|
else
|
1246
|
-
|
1457
|
+
positions
|
1247
1458
|
end
|
1248
1459
|
end
|
1249
1460
|
|
1250
|
-
|
1251
|
-
|
1252
|
-
|
1461
|
+
# Helper method for []=.
|
1462
|
+
# Assigs existing index to another value
|
1463
|
+
def modify_vector(indexes, val)
|
1464
|
+
positions = @index.pos(*indexes)
|
1465
|
+
|
1466
|
+
if positions.is_a? Numeric
|
1467
|
+
@data[positions] = val
|
1468
|
+
else
|
1469
|
+
positions.each { |pos| @data[pos] = val }
|
1470
|
+
end
|
1253
1471
|
end
|
1254
1472
|
|
1255
|
-
#
|
1256
|
-
#
|
1257
|
-
def
|
1258
|
-
|
1259
|
-
|
1260
|
-
|
1261
|
-
|
1262
|
-
|
1263
|
-
|
1264
|
-
|
1473
|
+
# Helper method for []=.
|
1474
|
+
# Add a new index and assign it value
|
1475
|
+
def insert_vector(indexes, val)
|
1476
|
+
new_index = @index.add(*indexes)
|
1477
|
+
# May be create +=
|
1478
|
+
(new_index.size - @index.size).times { @data << val }
|
1479
|
+
@index = new_index
|
1480
|
+
end
|
1481
|
+
|
1482
|
+
# Works similar to #[]= but also insert the vector in case index is not valid
|
1483
|
+
# It is there only to be accessed by Daru::DataFrame and not meant for user.
|
1484
|
+
def set indexes, val
|
1485
|
+
cast(dtype: :array) if val.nil? && dtype != :array
|
1486
|
+
guard_type_check(val)
|
1487
|
+
|
1488
|
+
if @index.valid?(*indexes)
|
1489
|
+
modify_vector(indexes, val)
|
1490
|
+
else
|
1491
|
+
insert_vector(indexes, val)
|
1492
|
+
end
|
1493
|
+
|
1494
|
+
update_position_cache
|
1495
|
+
end
|
1496
|
+
|
1497
|
+
def cut_find_category partitions, val, close_at
|
1498
|
+
case close_at
|
1499
|
+
when :right
|
1500
|
+
right_index = partitions.index { |i| i > val }
|
1501
|
+
raise ArgumentError, 'Invalid partition' if right_index.nil?
|
1502
|
+
left_index = right_index - 1
|
1503
|
+
"#{partitions[left_index]}-#{partitions[right_index]-1}"
|
1504
|
+
when :left
|
1505
|
+
right_index = partitions.index { |i| i >= val }
|
1506
|
+
raise ArgumentError, 'Invalid partition' if right_index.nil?
|
1507
|
+
left_index = right_index - 1
|
1508
|
+
"#{partitions[left_index]+1}-#{partitions[right_index]}"
|
1509
|
+
else
|
1510
|
+
raise ArgumentError, "Invalid parameter #{close_at} to close_at."
|
1511
|
+
end
|
1512
|
+
end
|
1513
|
+
|
1514
|
+
def cut_categories partitions, close_at
|
1515
|
+
case close_at
|
1516
|
+
when :right
|
1517
|
+
Array.new(partitions.size-1) do |left_index|
|
1518
|
+
"#{partitions[left_index]}-#{partitions[left_index+1]-1}"
|
1519
|
+
end
|
1520
|
+
when :left
|
1521
|
+
Array.new(partitions.size-1) do |left_index|
|
1522
|
+
"#{partitions[left_index]+1}-#{partitions[left_index+1]}"
|
1265
1523
|
end
|
1266
1524
|
end
|
1267
1525
|
end
|
1526
|
+
|
1527
|
+
def include_with_nan? array, value
|
1528
|
+
# Returns true if value is included in array.
|
1529
|
+
# Similar to include? but also works if value is Float::NAN
|
1530
|
+
if value.respond_to?(:nan?) && value.nan?
|
1531
|
+
array.any? { |i| i.respond_to?(:nan?) && i.nan? }
|
1532
|
+
else
|
1533
|
+
array.include? value
|
1534
|
+
end
|
1535
|
+
end
|
1536
|
+
|
1537
|
+
def update_position_cache
|
1538
|
+
@nil_positions = nil
|
1539
|
+
@nan_positions = nil
|
1540
|
+
end
|
1268
1541
|
end
|
1269
1542
|
end
|