daru 0.1.3.1 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rspec +2 -1
- data/.rspec_formatter.rb +33 -0
- data/.rubocop.yml +26 -2
- data/History.md +38 -0
- data/README.md +22 -13
- data/Rakefile +50 -2
- data/benchmarks/csv_reading.rb +22 -0
- data/daru.gemspec +9 -2
- data/lib/daru.rb +36 -4
- data/lib/daru/accessors/array_wrapper.rb +6 -1
- data/lib/daru/accessors/dataframe_by_row.rb +10 -2
- data/lib/daru/accessors/gsl_wrapper.rb +1 -3
- data/lib/daru/accessors/nmatrix_wrapper.rb +9 -0
- data/lib/daru/category.rb +935 -0
- data/lib/daru/core/group_by.rb +29 -38
- data/lib/daru/core/merge.rb +186 -145
- data/lib/daru/core/query.rb +22 -11
- data/lib/daru/dataframe.rb +976 -885
- data/lib/daru/date_time/index.rb +166 -166
- data/lib/daru/date_time/offsets.rb +66 -77
- data/lib/daru/formatters/table.rb +54 -0
- data/lib/daru/helpers/array.rb +40 -0
- data/lib/daru/index.rb +476 -73
- data/lib/daru/io/io.rb +66 -45
- data/lib/daru/io/sql_data_source.rb +33 -62
- data/lib/daru/iruby/helpers.rb +38 -0
- data/lib/daru/iruby/templates/dataframe.html.erb +52 -0
- data/lib/daru/iruby/templates/dataframe_mi.html.erb +58 -0
- data/lib/daru/iruby/templates/multi_index.html.erb +12 -0
- data/lib/daru/iruby/templates/vector.html.erb +27 -0
- data/lib/daru/iruby/templates/vector_mi.html.erb +36 -0
- data/lib/daru/maths/arithmetic/dataframe.rb +16 -18
- data/lib/daru/maths/arithmetic/vector.rb +4 -6
- data/lib/daru/maths/statistics/dataframe.rb +8 -15
- data/lib/daru/maths/statistics/vector.rb +120 -98
- data/lib/daru/monkeys.rb +12 -40
- data/lib/daru/plotting/gruff.rb +3 -0
- data/lib/daru/plotting/gruff/category.rb +49 -0
- data/lib/daru/plotting/gruff/dataframe.rb +91 -0
- data/lib/daru/plotting/gruff/vector.rb +57 -0
- data/lib/daru/plotting/nyaplot.rb +3 -0
- data/lib/daru/plotting/nyaplot/category.rb +34 -0
- data/lib/daru/plotting/nyaplot/dataframe.rb +187 -0
- data/lib/daru/plotting/nyaplot/vector.rb +46 -0
- data/lib/daru/vector.rb +694 -421
- data/lib/daru/version.rb +1 -1
- data/profile/_base.rb +23 -0
- data/profile/df_to_a.rb +10 -0
- data/profile/filter.rb +13 -0
- data/profile/joining.rb +13 -0
- data/profile/sorting.rb +12 -0
- data/profile/vector_each_with_index.rb +9 -0
- data/spec/accessors/wrappers_spec.rb +2 -4
- data/spec/categorical_spec.rb +1734 -0
- data/spec/core/group_by_spec.rb +52 -2
- data/spec/core/merge_spec.rb +63 -2
- data/spec/core/query_spec.rb +236 -80
- data/spec/dataframe_spec.rb +1373 -79
- data/spec/date_time/data_spec.rb +3 -5
- data/spec/date_time/index_spec.rb +154 -17
- data/spec/date_time/offsets_spec.rb +3 -4
- data/spec/fixtures/empties.dat +2 -0
- data/spec/fixtures/strings.dat +2 -0
- data/spec/formatters/table_formatter_spec.rb +99 -0
- data/spec/helpers_spec.rb +8 -0
- data/spec/index/categorical_index_spec.rb +168 -0
- data/spec/index/index_spec.rb +283 -0
- data/spec/index/multi_index_spec.rb +570 -0
- data/spec/io/io_spec.rb +31 -4
- data/spec/io/sql_data_source_spec.rb +0 -1
- data/spec/iruby/dataframe_spec.rb +172 -0
- data/spec/iruby/helpers_spec.rb +49 -0
- data/spec/iruby/multi_index_spec.rb +37 -0
- data/spec/iruby/vector_spec.rb +107 -0
- data/spec/math/arithmetic/dataframe_spec.rb +71 -13
- data/spec/math/arithmetic/vector_spec.rb +8 -10
- data/spec/math/statistics/dataframe_spec.rb +3 -5
- data/spec/math/statistics/vector_spec.rb +45 -55
- data/spec/monkeys_spec.rb +32 -9
- data/spec/plotting/dataframe_spec.rb +386 -0
- data/spec/plotting/vector_spec.rb +230 -0
- data/spec/shared/vector_display_spec.rb +215 -0
- data/spec/spec_helper.rb +23 -0
- data/spec/vector_spec.rb +905 -138
- metadata +143 -11
- data/.rubocop_todo.yml +0 -44
- data/lib/daru/plotting/dataframe.rb +0 -104
- data/lib/daru/plotting/vector.rb +0 -38
- data/spec/daru_spec.rb +0 -58
- data/spec/index_spec.rb +0 -375
@@ -0,0 +1,46 @@
|
|
1
|
+
module Daru
|
2
|
+
module Plotting
|
3
|
+
module Vector
|
4
|
+
module NyaplotLibrary
|
5
|
+
# Plots a Vector with Nyaplot on IRuby using the given options. Yields the
|
6
|
+
# plot object (Nyaplot::Plot) and the diagram object (Nyaplot::Diagram)
|
7
|
+
# to the block, which can be used for setting various options as per the
|
8
|
+
# Nyaplot API.
|
9
|
+
#
|
10
|
+
# == Options
|
11
|
+
# type (:scatter, :bar, :histogram), title, x_label, y_label, color(true/false)
|
12
|
+
#
|
13
|
+
# == Usage
|
14
|
+
# vector = Daru::Vector.new [10,20,30,40], [:one, :two, :three, :four]
|
15
|
+
# vector.plot(type: :bar) do |plot|
|
16
|
+
# plot.title "My first plot"
|
17
|
+
# plot.width 1200
|
18
|
+
# end
|
19
|
+
def plot opts={}
|
20
|
+
options = {
|
21
|
+
type: :scatter
|
22
|
+
}.merge(opts)
|
23
|
+
|
24
|
+
x_axis = options[:type] == :scatter ? Array.new(size) { |i| i } : @index.to_a
|
25
|
+
plot = Nyaplot::Plot.new
|
26
|
+
diagram = create_diagram plot, options[:type], x_axis
|
27
|
+
|
28
|
+
yield plot, diagram if block_given?
|
29
|
+
|
30
|
+
plot.show
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def create_diagram plot, type, x_axis
|
36
|
+
case type
|
37
|
+
when :box, :histogram
|
38
|
+
plot.add(type, @data.to_a)
|
39
|
+
else
|
40
|
+
plot.add(type, x_axis, @data.to_a)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
data/lib/daru/vector.rb
CHANGED
@@ -1,16 +1,98 @@
|
|
1
1
|
require 'daru/maths/arithmetic/vector.rb'
|
2
2
|
require 'daru/maths/statistics/vector.rb'
|
3
|
-
require 'daru/plotting/
|
3
|
+
require 'daru/plotting/gruff.rb'
|
4
|
+
require 'daru/plotting/nyaplot.rb'
|
4
5
|
require 'daru/accessors/array_wrapper.rb'
|
5
6
|
require 'daru/accessors/nmatrix_wrapper.rb'
|
6
7
|
require 'daru/accessors/gsl_wrapper.rb'
|
8
|
+
require 'daru/category.rb'
|
7
9
|
|
8
10
|
module Daru
|
9
|
-
class Vector
|
11
|
+
class Vector # rubocop:disable Metrics/ClassLength
|
10
12
|
include Enumerable
|
11
13
|
include Daru::Maths::Arithmetic::Vector
|
12
14
|
include Daru::Maths::Statistics::Vector
|
13
|
-
|
15
|
+
extend Gem::Deprecate
|
16
|
+
|
17
|
+
class << self
|
18
|
+
# Create a new vector by specifying the size and an optional value
|
19
|
+
# and block to generate values.
|
20
|
+
#
|
21
|
+
# == Description
|
22
|
+
#
|
23
|
+
# The *new_with_size* class method lets you create a Daru::Vector
|
24
|
+
# by specifying the size as the argument. The optional block, if
|
25
|
+
# supplied, is run once for populating each element in the Vector.
|
26
|
+
#
|
27
|
+
# The result of each run of the block is the value that is ultimately
|
28
|
+
# assigned to that position in the Vector.
|
29
|
+
#
|
30
|
+
# == Options
|
31
|
+
# :value
|
32
|
+
# All the rest like .new
|
33
|
+
def new_with_size n, opts={}, &block
|
34
|
+
value = opts.delete :value
|
35
|
+
block ||= ->(_) { value }
|
36
|
+
Daru::Vector.new Array.new(n, &block), opts
|
37
|
+
end
|
38
|
+
|
39
|
+
# Create a vector using (almost) any object
|
40
|
+
# * Array: flattened
|
41
|
+
# * Range: transformed using to_a
|
42
|
+
# * Daru::Vector
|
43
|
+
# * Numeric and string values
|
44
|
+
#
|
45
|
+
# == Description
|
46
|
+
#
|
47
|
+
# The `Vector.[]` class method creates a vector from almost any
|
48
|
+
# object that has a `#to_a` method defined on it. It is similar
|
49
|
+
# to R's `c` method.
|
50
|
+
#
|
51
|
+
# == Usage
|
52
|
+
#
|
53
|
+
# a = Daru::Vector[1,2,3,4,6..10]
|
54
|
+
# #=>
|
55
|
+
# # <Daru::Vector:99448510 @name = nil @size = 9 >
|
56
|
+
# # nil
|
57
|
+
# # 0 1
|
58
|
+
# # 1 2
|
59
|
+
# # 2 3
|
60
|
+
# # 3 4
|
61
|
+
# # 4 6
|
62
|
+
# # 5 7
|
63
|
+
# # 6 8
|
64
|
+
# # 7 9
|
65
|
+
# # 8 10
|
66
|
+
def [](*indexes)
|
67
|
+
values = indexes.map do |a|
|
68
|
+
a.respond_to?(:to_a) ? a.to_a : a
|
69
|
+
end.flatten
|
70
|
+
Daru::Vector.new(values)
|
71
|
+
end
|
72
|
+
|
73
|
+
def _load(data) # :nodoc:
|
74
|
+
h = Marshal.load(data)
|
75
|
+
Daru::Vector.new(h[:data],
|
76
|
+
index: h[:index],
|
77
|
+
name: h[:name],
|
78
|
+
dtype: h[:dtype], missing_values: h[:missing_values])
|
79
|
+
end
|
80
|
+
|
81
|
+
def coerce(data, options={})
|
82
|
+
case data
|
83
|
+
when Daru::Vector
|
84
|
+
data
|
85
|
+
when Array, Hash
|
86
|
+
new(data, options)
|
87
|
+
else
|
88
|
+
raise ArgumentError, "Can't coerce #{data.class} to #{self}"
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def size
|
94
|
+
@data.size
|
95
|
+
end
|
14
96
|
|
15
97
|
def each(&block)
|
16
98
|
return to_enum(:each) unless block_given?
|
@@ -26,17 +108,17 @@ module Daru
|
|
26
108
|
self
|
27
109
|
end
|
28
110
|
|
29
|
-
def each_with_index
|
111
|
+
def each_with_index &block
|
30
112
|
return to_enum(:each_with_index) unless block_given?
|
31
113
|
|
32
|
-
@index.each
|
114
|
+
@data.to_a.zip(@index.to_a).each(&block)
|
115
|
+
|
33
116
|
self
|
34
117
|
end
|
35
118
|
|
36
119
|
def map!(&block)
|
37
120
|
return to_enum(:map!) unless block_given?
|
38
121
|
@data.map!(&block)
|
39
|
-
update
|
40
122
|
self
|
41
123
|
end
|
42
124
|
|
@@ -44,8 +126,6 @@ module Daru
|
|
44
126
|
attr_reader :name
|
45
127
|
# The row index. Can be either Daru::Index or Daru::MultiIndex.
|
46
128
|
attr_reader :index
|
47
|
-
# The total number of elements of the vector.
|
48
|
-
attr_reader :size
|
49
129
|
# The underlying dtype of the Vector. Can be either :array, :nmatrix or :gsl.
|
50
130
|
attr_reader :dtype
|
51
131
|
# If the dtype is :nmatrix, this attribute represents the data type of the
|
@@ -54,13 +134,16 @@ module Daru
|
|
54
134
|
attr_reader :nm_dtype
|
55
135
|
# An Array or the positions in the vector that are being treated as 'missing'.
|
56
136
|
attr_reader :missing_positions
|
137
|
+
deprecate :missing_positions, :indexes, 2016, 10
|
57
138
|
# Store a hash of labels for values. Supplementary only. Recommend using index
|
58
139
|
# for proper usage.
|
59
140
|
attr_accessor :labels
|
60
141
|
# Store vector data in an array
|
61
142
|
attr_reader :data
|
62
|
-
#
|
63
|
-
|
143
|
+
# Ploting library being used for this vector
|
144
|
+
attr_reader :plotting_library
|
145
|
+
# TODO: Make private.
|
146
|
+
attr_reader :nil_positions, :nan_positions
|
64
147
|
|
65
148
|
# Create a Vector object.
|
66
149
|
#
|
@@ -93,102 +176,27 @@ module Daru
|
|
93
176
|
# vecarr = Daru::Vector.new [1,2,3,4], index: [:a, :e, :i, :o]
|
94
177
|
# vechsh = Daru::Vector.new({a: 1, e: 2, i: 3, o: 4})
|
95
178
|
def initialize source, opts={}
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
source
|
179
|
+
if opts[:type] == :category
|
180
|
+
# Initialize category type vector
|
181
|
+
extend Daru::Category
|
182
|
+
initialize_category source, opts
|
100
183
|
else
|
101
|
-
|
102
|
-
source
|
184
|
+
# Initialize non-category type vector
|
185
|
+
initialize_vector source, opts
|
103
186
|
end
|
104
|
-
name = opts[:name]
|
105
|
-
set_name name
|
106
|
-
|
107
|
-
@metadata = opts[:metadata] || {}
|
108
|
-
|
109
|
-
@data = cast_vector_to(opts[:dtype] || :array, source, opts[:nm_dtype])
|
110
|
-
@index = try_create_index(index || @data.size)
|
111
|
-
|
112
|
-
if @index.size > @data.size
|
113
|
-
cast(dtype: :array) # NM with nils seg faults
|
114
|
-
(@index.size - @data.size).times { @data << nil }
|
115
|
-
elsif @index.size < @data.size
|
116
|
-
raise IndexError, "Expected index size >= vector size. Index size : #{@index.size}, vector size : #{@data.size}"
|
117
|
-
end
|
118
|
-
|
119
|
-
@possibly_changed_type = true
|
120
|
-
set_missing_values opts[:missing_values]
|
121
|
-
set_missing_positions
|
122
|
-
set_size
|
123
187
|
end
|
124
188
|
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
# supplied, is run once for populating each element in the Vector.
|
133
|
-
#
|
134
|
-
# The result of each run of the block is the value that is ultimately
|
135
|
-
# assigned to that position in the Vector.
|
136
|
-
#
|
137
|
-
# == Options
|
138
|
-
# :value
|
139
|
-
# All the rest like .new
|
140
|
-
def self.new_with_size n, opts={}, &block
|
141
|
-
value = opts[:value]
|
142
|
-
opts.delete :value
|
143
|
-
if block
|
144
|
-
Daru::Vector.new Array.new(n) { |i| block.call(i) }, opts
|
189
|
+
def plotting_library= lib
|
190
|
+
case lib
|
191
|
+
when :gruff, :nyaplot
|
192
|
+
@plotting_library = lib
|
193
|
+
extend Module.const_get(
|
194
|
+
"Daru::Plotting::Vector::#{lib.to_s.capitalize}Library"
|
195
|
+
) if Daru.send("has_#{lib}?".to_sym)
|
145
196
|
else
|
146
|
-
|
147
|
-
|
148
|
-
end
|
149
|
-
|
150
|
-
# Create a vector using (almost) any object
|
151
|
-
# * Array: flattened
|
152
|
-
# * Range: transformed using to_a
|
153
|
-
# * Daru::Vector
|
154
|
-
# * Numeric and string values
|
155
|
-
#
|
156
|
-
# == Description
|
157
|
-
#
|
158
|
-
# The `Vector.[]` class method creates a vector from almost any
|
159
|
-
# object that has a `#to_a` method defined on it. It is similar
|
160
|
-
# to R's `c` method.
|
161
|
-
#
|
162
|
-
# == Usage
|
163
|
-
#
|
164
|
-
# a = Daru::Vector[1,2,3,4,6..10]
|
165
|
-
# #=>
|
166
|
-
# # <Daru::Vector:99448510 @name = nil @size = 9 >
|
167
|
-
# # nil
|
168
|
-
# # 0 1
|
169
|
-
# # 1 2
|
170
|
-
# # 2 3
|
171
|
-
# # 3 4
|
172
|
-
# # 4 6
|
173
|
-
# # 5 7
|
174
|
-
# # 6 8
|
175
|
-
# # 7 9
|
176
|
-
# # 8 10
|
177
|
-
def self.[](*args)
|
178
|
-
values = []
|
179
|
-
args.each do |a|
|
180
|
-
case a
|
181
|
-
when Array
|
182
|
-
values.concat a.flatten
|
183
|
-
when Daru::Vector
|
184
|
-
values.concat a.to_a
|
185
|
-
when Range
|
186
|
-
values.concat a.to_a
|
187
|
-
else
|
188
|
-
values << a
|
189
|
-
end
|
197
|
+
raise ArguementError, "Plotting library #{lib} not supported. "\
|
198
|
+
'Supported libraries are :nyaplot and :gruff'
|
190
199
|
end
|
191
|
-
Daru::Vector.new(values)
|
192
200
|
end
|
193
201
|
|
194
202
|
# Get one or more elements with specified index or a range.
|
@@ -203,19 +211,63 @@ module Daru
|
|
203
211
|
# # For vectors employing hierarchial multi index
|
204
212
|
#
|
205
213
|
def [](*input_indexes)
|
206
|
-
# Get
|
207
|
-
|
214
|
+
# Get array of positions indexes
|
215
|
+
positions = @index.pos(*input_indexes)
|
208
216
|
|
209
217
|
# If one object is asked return it
|
210
|
-
return @data[
|
218
|
+
return @data[positions] if positions.is_a? Numeric
|
211
219
|
|
212
|
-
# Form a new Vector using indexes
|
220
|
+
# Form a new Vector using positional indexes
|
213
221
|
Daru::Vector.new(
|
214
|
-
|
215
|
-
name: @name,
|
222
|
+
positions.map { |loc| @data[loc] },
|
223
|
+
name: @name,
|
224
|
+
index: @index.subset(*input_indexes), dtype: @dtype
|
216
225
|
)
|
217
226
|
end
|
218
227
|
|
228
|
+
# Returns vector of values given positional values
|
229
|
+
# @param [Array<object>] *positions positional values
|
230
|
+
# @return [object] vector
|
231
|
+
# @example
|
232
|
+
# dv = Daru::Vector.new 'a'..'e'
|
233
|
+
# dv.at 0, 1, 2
|
234
|
+
# # => #<Daru::Vector(3)>
|
235
|
+
# # 0 a
|
236
|
+
# # 1 b
|
237
|
+
# # 2 c
|
238
|
+
def at *positions
|
239
|
+
# to be used to form index
|
240
|
+
original_positions = positions
|
241
|
+
positions = coerce_positions(*positions)
|
242
|
+
validate_positions(*positions)
|
243
|
+
|
244
|
+
if positions.is_a? Integer
|
245
|
+
@data[positions]
|
246
|
+
else
|
247
|
+
values = positions.map { |pos| @data[pos] }
|
248
|
+
Daru::Vector.new values, index: @index.at(*original_positions), dtype: dtype
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
# Change value at given positions
|
253
|
+
# @param [Array<object>] *positions positional values
|
254
|
+
# @param [object] val value to assign
|
255
|
+
# @example
|
256
|
+
# dv = Daru::Vector.new 'a'..'e'
|
257
|
+
# dv.set_at [0, 1], 'x'
|
258
|
+
# dv
|
259
|
+
# # => #<Daru::Vector(5)>
|
260
|
+
# # 0 x
|
261
|
+
# # 1 x
|
262
|
+
# # 2 c
|
263
|
+
# # 3 d
|
264
|
+
# # 4 e
|
265
|
+
def set_at positions, val
|
266
|
+
validate_positions(*positions)
|
267
|
+
positions.map { |pos| @data[pos] = val }
|
268
|
+
update_position_cache
|
269
|
+
end
|
270
|
+
|
219
271
|
# Just like in Hashes, you can specify the index label of the Daru::Vector
|
220
272
|
# and assign an element an that place in the Daru::Vector.
|
221
273
|
#
|
@@ -229,57 +281,14 @@ module Daru
|
|
229
281
|
# # a 999
|
230
282
|
# # b 2
|
231
283
|
# # c 3
|
232
|
-
def []=(*
|
233
|
-
cast(dtype: :array) if
|
234
|
-
|
235
|
-
@possibly_changed_type = true if @type == :object && (value.nil? ||
|
236
|
-
value.is_a?(Numeric))
|
237
|
-
@possibly_changed_type = true if @type == :numeric && (!value.is_a?(Numeric) &&
|
238
|
-
!value.nil?)
|
239
|
-
|
240
|
-
pos = @index[*location]
|
241
|
-
|
242
|
-
if pos.is_a?(Numeric)
|
243
|
-
@data[pos] = value
|
244
|
-
else
|
245
|
-
begin
|
246
|
-
pos.each { |tuple| self[tuple] = value }
|
247
|
-
rescue NoMethodError
|
248
|
-
raise IndexError, "Specified index #{pos.inspect} does not exist."
|
249
|
-
end
|
250
|
-
end
|
251
|
-
|
252
|
-
set_size
|
253
|
-
set_missing_positions unless Daru.lazy_update
|
254
|
-
end
|
284
|
+
def []=(*indexes, val)
|
285
|
+
cast(dtype: :array) if val.nil? && dtype != :array
|
255
286
|
|
256
|
-
|
257
|
-
# type. To set missing values see the missing_values= method.
|
258
|
-
def missing_values
|
259
|
-
@missing_values.keys
|
260
|
-
end
|
287
|
+
guard_type_check(val)
|
261
288
|
|
262
|
-
|
263
|
-
#
|
264
|
-
# == Usage
|
265
|
-
#
|
266
|
-
# v = Daru::Vector.new [1,2,3,4,5]
|
267
|
-
# v.missing_values = [3]
|
268
|
-
# v.update
|
269
|
-
# v.missing_positions
|
270
|
-
# #=> [2]
|
271
|
-
def missing_values= values
|
272
|
-
set_missing_values values
|
273
|
-
set_missing_positions unless Daru.lazy_update
|
274
|
-
end
|
289
|
+
modify_vector(indexes, val)
|
275
290
|
|
276
|
-
|
277
|
-
# after assingment/deletion etc. are complete. This is provided so that
|
278
|
-
# time is not wasted in creating the metadata for the vector each time
|
279
|
-
# assignment/deletion of elements is done. Updating data this way is called
|
280
|
-
# lazy loading. To set or unset lazy loading, see the .lazy_update= method.
|
281
|
-
def update
|
282
|
-
Daru.lazy_update and set_missing_positions
|
291
|
+
update_position_cache
|
283
292
|
end
|
284
293
|
|
285
294
|
# Two vectors are equal if the have the exact same index values corresponding
|
@@ -287,7 +296,7 @@ module Daru
|
|
287
296
|
def == other
|
288
297
|
case other
|
289
298
|
when Daru::Vector
|
290
|
-
@index == other.index &&
|
299
|
+
@index == other.index && size == other.size &&
|
291
300
|
@index.all? { |index| self[index] == other[index] }
|
292
301
|
else
|
293
302
|
super
|
@@ -405,8 +414,8 @@ module Daru
|
|
405
414
|
# # 11 5
|
406
415
|
# # 13 5
|
407
416
|
# # 15 1
|
408
|
-
def where
|
409
|
-
Daru::Core::Query.vector_where
|
417
|
+
def where bool_array
|
418
|
+
Daru::Core::Query.vector_where self, bool_array
|
410
419
|
end
|
411
420
|
|
412
421
|
def head q=10
|
@@ -414,18 +423,41 @@ module Daru
|
|
414
423
|
end
|
415
424
|
|
416
425
|
def tail q=10
|
417
|
-
|
426
|
+
start = [size - q, 0].max
|
427
|
+
self[start..(size-1)]
|
418
428
|
end
|
419
429
|
|
420
430
|
def empty?
|
421
431
|
@index.empty?
|
422
432
|
end
|
423
433
|
|
434
|
+
def numeric?
|
435
|
+
type == :numeric
|
436
|
+
end
|
437
|
+
|
438
|
+
def object?
|
439
|
+
type == :object
|
440
|
+
end
|
441
|
+
|
424
442
|
# Reports whether missing data is present in the Vector.
|
425
443
|
def has_missing_data?
|
426
|
-
!
|
444
|
+
!indexes(*Daru::MISSING_VALUES).empty?
|
427
445
|
end
|
428
446
|
alias :flawed? :has_missing_data?
|
447
|
+
deprecate :has_missing_data?, :include_values?, 2016, 10
|
448
|
+
deprecate :flawed?, :include_values?, 2016, 10
|
449
|
+
|
450
|
+
# Check if any one of mentioned values occur in the vector
|
451
|
+
# @param [Array] *values values to check for
|
452
|
+
# @return [true, false] returns true if any one of specified values
|
453
|
+
# occur in the vector
|
454
|
+
# @example
|
455
|
+
# dv = Daru::Vector.new [1, 2, 3, 4, nil]
|
456
|
+
# dv.include_values? nil, Float::NAN
|
457
|
+
# # => true
|
458
|
+
def include_values?(*values)
|
459
|
+
values.any? { |v| include_with_nan? @data, v }
|
460
|
+
end
|
429
461
|
|
430
462
|
# Append an element to the vector by specifying the element and index
|
431
463
|
def concat element, index
|
@@ -434,8 +466,7 @@ module Daru
|
|
434
466
|
@index |= [index]
|
435
467
|
@data[@index[index]] = element
|
436
468
|
|
437
|
-
|
438
|
-
set_missing_positions unless Daru.lazy_update
|
469
|
+
update_position_cache
|
439
470
|
end
|
440
471
|
alias :push :concat
|
441
472
|
alias :<< :concat
|
@@ -463,8 +494,7 @@ module Daru
|
|
463
494
|
@data.delete_at @index[index]
|
464
495
|
@index = Daru::Index.new(@index.to_a - [index])
|
465
496
|
|
466
|
-
|
467
|
-
set_missing_positions unless Daru.lazy_update
|
497
|
+
update_position_cache
|
468
498
|
end
|
469
499
|
|
470
500
|
# The type of data contained in the vector. Can be :object or :numeric. If
|
@@ -489,6 +519,16 @@ module Daru
|
|
489
519
|
@type
|
490
520
|
end
|
491
521
|
|
522
|
+
# Tells if vector is categorical or not.
|
523
|
+
# @return [true, false] true if vector is of type category, false otherwise
|
524
|
+
# @example
|
525
|
+
# dv = Daru::Vector.new [1, 2, 3], type: :category
|
526
|
+
# dv.category?
|
527
|
+
# # => true
|
528
|
+
def category?
|
529
|
+
type == :category
|
530
|
+
end
|
531
|
+
|
492
532
|
# Get index of element
|
493
533
|
def index_of element
|
494
534
|
case dtype
|
@@ -500,11 +540,9 @@ module Daru
|
|
500
540
|
# Keep only unique elements of the vector alongwith their indexes.
|
501
541
|
def uniq
|
502
542
|
uniq_vector = @data.uniq
|
503
|
-
new_index = uniq_vector.
|
504
|
-
acc << index_of(element)
|
505
|
-
end
|
543
|
+
new_index = uniq_vector.map { |element| index_of(element) }
|
506
544
|
|
507
|
-
Daru::Vector.new uniq_vector, name: @name,
|
545
|
+
Daru::Vector.new uniq_vector, name: @name, index: new_index, dtype: @dtype
|
508
546
|
end
|
509
547
|
|
510
548
|
def any? &block
|
@@ -531,47 +569,46 @@ module Daru
|
|
531
569
|
# v = Daru::Vector.new ["My first guitar", "jazz", "guitar"]
|
532
570
|
# # Say you want to sort these strings by length.
|
533
571
|
# v.sort(ascending: false) { |a,b| a.length <=> b.length }
|
534
|
-
def sort opts={}
|
535
|
-
opts = {
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
vector_index = @data.each_with_index
|
540
|
-
vector_index =
|
541
|
-
if block_given?
|
542
|
-
vector_index.sort { |a,b| yield(a[0], b[0]) }
|
543
|
-
else
|
544
|
-
vector_index.sort { |(av, ai), (bv, bi)|
|
545
|
-
if !av.nil? && !bv.nil?
|
546
|
-
av <=> bv
|
547
|
-
elsif av.nil? && bv.nil?
|
548
|
-
ai <=> bi
|
549
|
-
elsif av.nil?
|
550
|
-
opts[:ascending] ? -1 : 1
|
551
|
-
else
|
552
|
-
opts[:ascending] ? 1 : -1
|
553
|
-
end
|
554
|
-
}
|
555
|
-
end
|
556
|
-
vector_index.reverse! unless opts[:ascending]
|
572
|
+
def sort opts={}, &block
|
573
|
+
opts = {ascending: true}.merge(opts)
|
574
|
+
|
575
|
+
vector_index = resort_index(@data.each_with_index, opts, &block)
|
557
576
|
vector, index = vector_index.transpose
|
558
|
-
old_index = @index.to_a
|
559
|
-
index = index.map { |i| old_index[i] }
|
560
577
|
|
561
|
-
|
578
|
+
index = @index.reorder index
|
579
|
+
|
580
|
+
Daru::Vector.new(vector, index: index, name: @name, dtype: @dtype)
|
581
|
+
end
|
582
|
+
|
583
|
+
DEFAULT_SORTER = lambda { |(lv, li), (rv, ri)|
|
584
|
+
case
|
585
|
+
when lv.nil? && rv.nil?
|
586
|
+
li <=> ri
|
587
|
+
when lv.nil?
|
588
|
+
-1
|
589
|
+
when rv.nil?
|
590
|
+
1
|
591
|
+
else
|
592
|
+
lv <=> rv
|
593
|
+
end
|
594
|
+
}
|
595
|
+
|
596
|
+
def resort_index vector_index, opts
|
597
|
+
if block_given?
|
598
|
+
vector_index.sort { |(lv, _li), (rv, _ri)| yield(lv, rv) }
|
599
|
+
else
|
600
|
+
vector_index.sort(&DEFAULT_SORTER)
|
601
|
+
end
|
602
|
+
.tap { |res| res.reverse! unless opts[:ascending] }
|
562
603
|
end
|
563
604
|
|
564
605
|
# Just sort the data and get an Array in return using Enumerable#sort.
|
565
606
|
# Non-destructive.
|
607
|
+
# :nocov:
|
566
608
|
def sorted_data &block
|
567
609
|
@data.to_a.sort(&block)
|
568
610
|
end
|
569
|
-
|
570
|
-
# Returns *true* if the value passed is actually exists or is not marked as
|
571
|
-
# a *missing value*.
|
572
|
-
def exists? value
|
573
|
-
!@missing_values.key?(self[index_of(value)])
|
574
|
-
end
|
611
|
+
# :nocov:
|
575
612
|
|
576
613
|
# Like map, but returns a Daru::Vector with the returned values.
|
577
614
|
def recode dt=nil, &block
|
@@ -593,19 +630,12 @@ module Daru
|
|
593
630
|
def delete_if
|
594
631
|
return to_enum(:delete_if) unless block_given?
|
595
632
|
|
596
|
-
keep_e =
|
597
|
-
keep_i = []
|
598
|
-
each_with_index do |n, i|
|
599
|
-
unless yield(n)
|
600
|
-
keep_e << n
|
601
|
-
keep_i << i
|
602
|
-
end
|
603
|
-
end
|
633
|
+
keep_e, keep_i = each_with_index.select { |n, _i| !yield(n) }.transpose
|
604
634
|
|
605
635
|
@data = cast_vector_to @dtype, keep_e
|
606
636
|
@index = Daru::Index.new(keep_i)
|
607
|
-
|
608
|
-
|
637
|
+
|
638
|
+
update_position_cache
|
609
639
|
|
610
640
|
self
|
611
641
|
end
|
@@ -614,32 +644,16 @@ module Daru
|
|
614
644
|
def keep_if
|
615
645
|
return to_enum(:keep_if) unless block_given?
|
616
646
|
|
617
|
-
|
618
|
-
keep_i = []
|
619
|
-
each_with_index do |n, i|
|
620
|
-
if yield(n)
|
621
|
-
keep_e << n
|
622
|
-
keep_i << i
|
623
|
-
end
|
624
|
-
end
|
625
|
-
|
626
|
-
@data = cast_vector_to @dtype, keep_e
|
627
|
-
@index = Daru::Index.new(keep_i)
|
628
|
-
set_missing_positions unless Daru.lazy_update
|
629
|
-
set_size
|
630
|
-
|
631
|
-
self
|
647
|
+
delete_if { |val| !yield(val) }
|
632
648
|
end
|
633
649
|
|
634
650
|
# Reports all values that doesn't comply with a condition.
|
635
651
|
# Returns a hash with the index of data and the invalid data.
|
636
652
|
def verify
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
h
|
653
|
+
(0...size)
|
654
|
+
.map { |i| [i, @data[i]] }
|
655
|
+
.reject { |_i, val| yield(val) }
|
656
|
+
.to_h
|
643
657
|
end
|
644
658
|
|
645
659
|
# Return an Array with the data splitted by a separator.
|
@@ -674,29 +688,19 @@ module Daru
|
|
674
688
|
#
|
675
689
|
def split_by_separator sep=','
|
676
690
|
split_data = splitted sep
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
out[f].push(nil)
|
685
|
-
end
|
686
|
-
else
|
687
|
-
factors.each do |f|
|
688
|
-
out[f].push(r.include?(f) ? 1 : 0)
|
689
|
-
end
|
690
|
-
end
|
691
|
-
end
|
692
|
-
|
693
|
-
out.map { |k, v| [k, Daru::Vector.new(v)] }.to_h
|
691
|
+
split_data
|
692
|
+
.flatten.uniq.compact.map do |key|
|
693
|
+
[
|
694
|
+
key,
|
695
|
+
Daru::Vector.new(split_data.map { |v| split_value(key, v) })
|
696
|
+
]
|
697
|
+
end.to_h
|
694
698
|
end
|
695
699
|
|
696
700
|
def split_by_separator_freq(sep=',')
|
697
|
-
split_by_separator(sep).map
|
698
|
-
[k, v.inject
|
699
|
-
|
701
|
+
split_by_separator(sep).map { |k, v|
|
702
|
+
[k, v.map(&:to_i).inject(:+)]
|
703
|
+
}.to_h
|
700
704
|
end
|
701
705
|
|
702
706
|
def reset_index!
|
@@ -718,23 +722,15 @@ module Daru
|
|
718
722
|
# # 1 false
|
719
723
|
# # 2 false
|
720
724
|
# # 3 true
|
725
|
+
#
|
721
726
|
def is_nil?
|
722
|
-
|
723
|
-
|
724
|
-
nil_truth_vector[idx] = self[idx].nil? ? true : false
|
725
|
-
end
|
726
|
-
|
727
|
-
nil_truth_vector
|
727
|
+
# FIXME: EXTREMELY bad name for method not returning boolean - zverok, 2016-05-18
|
728
|
+
recode(&:nil?)
|
728
729
|
end
|
729
730
|
|
730
731
|
# Opposite of #is_nil?
|
731
732
|
def not_nil?
|
732
|
-
|
733
|
-
@index.each do |idx|
|
734
|
-
nil_truth_vector[idx] = self[idx].nil? ? false : true
|
735
|
-
end
|
736
|
-
|
737
|
-
nil_truth_vector
|
733
|
+
recode { |v| !v.nil? }
|
738
734
|
end
|
739
735
|
|
740
736
|
# Replace all nils in the vector with the value passed as an argument. Destructive.
|
@@ -744,7 +740,7 @@ module Daru
|
|
744
740
|
#
|
745
741
|
# * +replacement+ - The value which should replace all nils
|
746
742
|
def replace_nils! replacement
|
747
|
-
|
743
|
+
indexes(*Daru::MISSING_VALUES).each do |idx|
|
748
744
|
self[idx] = replacement
|
749
745
|
end
|
750
746
|
|
@@ -765,13 +761,13 @@ module Daru
|
|
765
761
|
# ts.lag # => [nil, 0.69, 0.23, 0.44, ...]
|
766
762
|
# ts.lag(2) # => [nil, nil, 0.69, 0.23, ...]
|
767
763
|
def lag k=1
|
768
|
-
return dup if k
|
764
|
+
return dup if k.zero?
|
769
765
|
|
770
766
|
dat = @data.to_a.dup
|
771
767
|
(dat.size - 1).downto(k) { |i| dat[i] = dat[i - k] }
|
772
768
|
(0...k).each { |i| dat[i] = nil }
|
773
769
|
|
774
|
-
Daru::Vector.new(dat, index: @index, name: @name
|
770
|
+
Daru::Vector.new(dat, index: @index, name: @name)
|
775
771
|
end
|
776
772
|
|
777
773
|
def detach_index
|
@@ -788,7 +784,19 @@ module Daru
|
|
788
784
|
|
789
785
|
# number of non-missing elements
|
790
786
|
def n_valid
|
791
|
-
|
787
|
+
size - indexes(*Daru::MISSING_VALUES).size
|
788
|
+
end
|
789
|
+
deprecate :n_valid, :count_values, 2016, 10
|
790
|
+
|
791
|
+
# Count the number of values specified
|
792
|
+
# @param [Array] *values values to count for
|
793
|
+
# @return [Integer] the number of times the values mentioned occurs
|
794
|
+
# @example
|
795
|
+
# dv = Daru::Vector.new [1, 2, 1, 2, 3, 4, nil, nil]
|
796
|
+
# dv.count_values nil
|
797
|
+
# # => 2
|
798
|
+
def count_values(*values)
|
799
|
+
positions(*values).size
|
792
800
|
end
|
793
801
|
|
794
802
|
# Returns *true* if an index exists
|
@@ -796,6 +804,11 @@ module Daru
|
|
796
804
|
@index.include? index
|
797
805
|
end
|
798
806
|
|
807
|
+
# @return [Daru::DataFrame] the vector as a single-vector dataframe
|
808
|
+
def to_df
|
809
|
+
Daru::DataFrame.new({@name => @data}, name: @name, index: @index)
|
810
|
+
end
|
811
|
+
|
799
812
|
# Convert Vector to a horizontal or vertical Ruby Matrix.
|
800
813
|
#
|
801
814
|
# == Arguments
|
@@ -811,11 +824,39 @@ module Daru
|
|
811
824
|
end
|
812
825
|
end
|
813
826
|
|
827
|
+
# Convert vector to nmatrix object
|
828
|
+
# @param [Symbol] axis :horizontal or :vertical
|
829
|
+
# @return [NMatrix] NMatrix object containing all values of the vector
|
830
|
+
# @example
|
831
|
+
# dv = Daru::Vector.new [1, 2, 3]
|
832
|
+
# dv.to_nmatrix
|
833
|
+
# # =>
|
834
|
+
# # [
|
835
|
+
# # [1, 2, 3] ]
|
836
|
+
def to_nmatrix axis=:horizontal
|
837
|
+
raise ArgumentError, 'Can not convert to nmatrix'\
|
838
|
+
'because the vector is numeric' unless numeric? && !include?(nil)
|
839
|
+
|
840
|
+
case axis
|
841
|
+
when :horizontal
|
842
|
+
NMatrix.new [1, size], to_a
|
843
|
+
when :vertical
|
844
|
+
NMatrix.new [size, 1], to_a
|
845
|
+
else
|
846
|
+
raise ArgumentError, 'Invalid axis specified. '\
|
847
|
+
'Valid axis are :horizontal and :vertical'
|
848
|
+
end
|
849
|
+
end
|
850
|
+
|
814
851
|
# If dtype != gsl, will convert data to GSL::Vector with to_a. Otherwise returns
|
815
852
|
# the stored GSL::Vector object.
|
816
853
|
def to_gsl
|
817
854
|
raise NoMethodError, 'Install gsl-nmatrix for access to this functionality.' unless Daru.has_gsl?
|
818
|
-
dtype == :gsl
|
855
|
+
if dtype == :gsl
|
856
|
+
@data.data
|
857
|
+
else
|
858
|
+
GSL::Vector.alloc(reject_values(*Daru::MISSING_VALUES).to_a)
|
859
|
+
end
|
819
860
|
end
|
820
861
|
|
821
862
|
# Convert to hash (explicit). Hash keys are indexes and values are the correspoding elements
|
@@ -835,30 +876,12 @@ module Daru
|
|
835
876
|
|
836
877
|
# Convert to html for iruby
|
837
878
|
def to_html threshold=30
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
'</tr>'
|
845
|
-
html += '<tr><th> </th><th>' + name.to_s + '</th></tr>'
|
846
|
-
@index.each_with_index do |index, num|
|
847
|
-
html += '<tr><td>' + index.to_s + '</td>' + '<td>' + self[index].to_s + '</td></tr>'
|
848
|
-
|
849
|
-
next if num <= threshold
|
850
|
-
html += '<tr><td>...</td><td>...</td></tr>'
|
851
|
-
|
852
|
-
last_index = @index.to_a.last
|
853
|
-
html += '<tr>' \
|
854
|
-
'<td>' + last_index.to_s + '</td>' \
|
855
|
-
'<td>' + self[last_index].to_s + '</td>' \
|
856
|
-
'</tr>'
|
857
|
-
break
|
858
|
-
end
|
859
|
-
html += '</table>'
|
860
|
-
|
861
|
-
html
|
879
|
+
path = if index.is_a?(MultiIndex)
|
880
|
+
File.expand_path('../iruby/templates/vector_mi.html.erb', __FILE__)
|
881
|
+
else
|
882
|
+
File.expand_path('../iruby/templates/vector.html.erb', __FILE__)
|
883
|
+
end
|
884
|
+
ERB.new(File.read(path).strip).result(binding)
|
862
885
|
end
|
863
886
|
|
864
887
|
def to_s
|
@@ -870,10 +893,11 @@ module Daru
|
|
870
893
|
ReportBuilder.new(no_title: true).add(self).send(method)
|
871
894
|
end
|
872
895
|
|
873
|
-
|
896
|
+
# :nocov:
|
897
|
+
def report_building b # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
874
898
|
b.section(name: name) do |s|
|
875
899
|
s.text "n :#{size}"
|
876
|
-
s.text "n valid:#{
|
900
|
+
s.text "n valid:#{count_values(*Daru::MISSING_VALUES)}"
|
877
901
|
if @type == :object
|
878
902
|
s.text "factors: #{factors.to_a.join(',')}"
|
879
903
|
s.text "mode: #{mode}"
|
@@ -881,7 +905,7 @@ module Daru
|
|
881
905
|
s.table(name: 'Distribution') do |t|
|
882
906
|
frequencies.sort_by(&:to_s).each do |k,v|
|
883
907
|
key = @index.include?(k) ? @index[k] : k
|
884
|
-
t.row [key, v, ('%0.2f%%' % (v.quo(
|
908
|
+
t.row [key, v, ('%0.2f%%' % (v.quo(count_values(*Daru::MISSING_VALUES))*100))]
|
885
909
|
end
|
886
910
|
end
|
887
911
|
end
|
@@ -898,47 +922,71 @@ module Daru
|
|
898
922
|
end
|
899
923
|
end
|
900
924
|
end
|
925
|
+
# :nocov:
|
901
926
|
|
902
927
|
# Over rides original inspect for pretty printing in irb
|
903
928
|
def inspect spacing=20, threshold=15
|
904
|
-
|
905
|
-
|
906
|
-
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
|
911
|
-
|
912
|
-
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
|
917
|
-
|
918
|
-
|
919
|
-
|
920
|
-
|
921
|
-
|
922
|
-
|
923
|
-
|
924
|
-
|
925
|
-
|
929
|
+
row_headers = index.is_a?(MultiIndex) ? index.sparse_tuples : index.to_a
|
930
|
+
|
931
|
+
"#<#{self.class}(#{size})#{':cataegory' if category?}>\n" +
|
932
|
+
Formatters::Table.format(
|
933
|
+
to_a.lazy.map { |v| [v] },
|
934
|
+
headers: @name && [@name],
|
935
|
+
row_headers: row_headers,
|
936
|
+
threshold: threshold,
|
937
|
+
spacing: spacing
|
938
|
+
)
|
939
|
+
end
|
940
|
+
|
941
|
+
# Sets new index for vector. Preserves index->value correspondence.
|
942
|
+
# Sets nil for new index keys absent from original index.
|
943
|
+
# @note Unlike #reorder! which takes positions as input it takes
|
944
|
+
# index as an input to reorder the vector
|
945
|
+
# @param [Daru::Index, Daru::MultiIndex] new_index new index to order with
|
946
|
+
# @return [Daru::Vector] vector reindexed with new index
|
947
|
+
def reindex! new_index
|
948
|
+
values = []
|
949
|
+
each_with_index do |val, i|
|
950
|
+
values[new_index[i]] = val if new_index.include?(i)
|
926
951
|
end
|
927
|
-
|
952
|
+
values.fill(nil, values.size, new_index.size - values.size)
|
953
|
+
|
954
|
+
@data = cast_vector_to @dtype, values
|
955
|
+
@index = new_index
|
928
956
|
|
929
|
-
|
957
|
+
update_position_cache
|
958
|
+
|
959
|
+
self
|
960
|
+
end
|
961
|
+
|
962
|
+
# Reorder the vector with given positions
|
963
|
+
# @note Unlike #reindex! which takes index as input, it takes
|
964
|
+
# positions as an input to reorder the vector
|
965
|
+
# @param [Array] order the order to reorder the vector with
|
966
|
+
# @return reordered vector
|
967
|
+
# @example
|
968
|
+
# dv = Daru::Vector.new [3, 2, 1], index: ['c', 'b', 'a']
|
969
|
+
# dv.reorder! [2, 1, 0]
|
970
|
+
# # => #<Daru::Vector(3)>
|
971
|
+
# # a 1
|
972
|
+
# # b 2
|
973
|
+
# # c 3
|
974
|
+
def reorder! order
|
975
|
+
@index = @index.reorder order
|
976
|
+
@data = order.map { |i| @data[i] }
|
977
|
+
update_position_cache
|
978
|
+
self
|
979
|
+
end
|
980
|
+
|
981
|
+
# Non-destructive version of #reorder!
|
982
|
+
def reorder order
|
983
|
+
dup.reorder! order
|
930
984
|
end
|
931
985
|
|
932
986
|
# Create a new vector with a different index, and preserve the indexing of
|
933
987
|
# current elements.
|
934
988
|
def reindex new_index
|
935
|
-
|
936
|
-
|
937
|
-
new_index.each do |idx|
|
938
|
-
vector[idx] = @index.include?(idx) ? self[idx] : nil
|
939
|
-
end
|
940
|
-
|
941
|
-
vector
|
989
|
+
dup.reindex!(new_index)
|
942
990
|
end
|
943
991
|
|
944
992
|
def index= idx
|
@@ -956,17 +1004,16 @@ module Daru
|
|
956
1004
|
#
|
957
1005
|
# @param new_name [Symbol] The new name.
|
958
1006
|
def rename new_name
|
959
|
-
if new_name.is_a?(Numeric)
|
960
|
-
@name = new_name
|
961
|
-
return
|
962
|
-
end
|
963
|
-
|
964
1007
|
@name = new_name
|
1008
|
+
self
|
965
1009
|
end
|
966
1010
|
|
967
|
-
|
1011
|
+
alias_method :name=, :rename
|
1012
|
+
|
1013
|
+
# Duplicated a vector
|
1014
|
+
# @return [Daru::Vector] duplicated vector
|
968
1015
|
def dup
|
969
|
-
Daru::Vector.new @data.dup, name: @name,
|
1016
|
+
Daru::Vector.new @data.dup, name: @name, index: @index.dup
|
970
1017
|
end
|
971
1018
|
|
972
1019
|
# == Bootstrap
|
@@ -1019,8 +1066,8 @@ module Daru
|
|
1019
1066
|
#
|
1020
1067
|
# == Reference:
|
1021
1068
|
# * Sawyer, S. (2005). Resampling Data: Using a Statistical Jacknife.
|
1022
|
-
def jackknife(estimators, k=1)
|
1023
|
-
raise "n should be divisible by k:#{k}" unless size % k
|
1069
|
+
def jackknife(estimators, k=1) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
|
1070
|
+
raise "n should be divisible by k:#{k}" unless (size % k).zero?
|
1024
1071
|
|
1025
1072
|
nb = (size / k).to_i
|
1026
1073
|
h_est, es, ps = prepare_bootstrap(estimators)
|
@@ -1057,62 +1104,123 @@ module Daru
|
|
1057
1104
|
# vector, setting this to false will return the same vector.
|
1058
1105
|
# Otherwise, a duplicate will be returned irrespective of
|
1059
1106
|
# presence of missing data.
|
1060
|
-
def only_valid as_a=:vector, duplicate=true
|
1061
|
-
return dup if !has_missing_data? && as_a == :vector && duplicate
|
1062
|
-
return self if !has_missing_data? && as_a == :vector && !duplicate
|
1063
|
-
return to_a if !has_missing_data? && as_a != :vector
|
1064
|
-
|
1065
|
-
new_index = @index.to_a - missing_positions
|
1066
|
-
new_vector = new_index.map do |idx|
|
1067
|
-
self[idx]
|
1068
|
-
end
|
1069
1107
|
|
1070
|
-
|
1108
|
+
def only_valid as_a=:vector, _duplicate=true
|
1109
|
+
# FIXME: Now duplicate is just ignored.
|
1110
|
+
# There are no spec that fail on this case, so I'll leave it
|
1111
|
+
# this way for now - zverok, 2016-05-07
|
1071
1112
|
|
1072
|
-
|
1113
|
+
new_index = @index.to_a - indexes(*Daru::MISSING_VALUES)
|
1114
|
+
new_vector = new_index.map { |idx| self[idx] }
|
1115
|
+
|
1116
|
+
if as_a == :vector
|
1117
|
+
Daru::Vector.new new_vector, index: new_index, name: @name, dtype: dtype
|
1118
|
+
else
|
1119
|
+
new_vector
|
1120
|
+
end
|
1121
|
+
end
|
1122
|
+
deprecate :only_valid, :reject_values, 2016, 10
|
1123
|
+
|
1124
|
+
# Return a vector with specified values removed
|
1125
|
+
# @param [Array] *values values to reject from resultant vector
|
1126
|
+
# @return [Daru::Vector] vector with specified values removed
|
1127
|
+
# @example
|
1128
|
+
# dv = Daru::Vector.new [1, 2, nil, Float::NAN]
|
1129
|
+
# dv.reject_values nil, Float::NAN
|
1130
|
+
# # => #<Daru::Vector(2)>
|
1131
|
+
# # 0 1
|
1132
|
+
# # 1 2
|
1133
|
+
def reject_values(*values)
|
1134
|
+
resultant_pos = size.times.to_a - positions(*values)
|
1135
|
+
dv = at(*resultant_pos)
|
1136
|
+
# Handle the case when number of positions is 1
|
1137
|
+
# and hence #at doesn't return a vector
|
1138
|
+
if dv.is_a?(Daru::Vector)
|
1139
|
+
dv
|
1140
|
+
else
|
1141
|
+
pos = resultant_pos.first
|
1142
|
+
at(pos..pos)
|
1143
|
+
end
|
1144
|
+
end
|
1145
|
+
|
1146
|
+
# Return indexes of values specified
|
1147
|
+
# @param [Array] *values values to find indexes for
|
1148
|
+
# @return [Array] array of indexes of values specified
|
1149
|
+
# @example
|
1150
|
+
# dv = Daru::Vector.new [1, 2, nil, Float::NAN], index: 11..14
|
1151
|
+
# dv.indexes nil, Float::NAN
|
1152
|
+
# # => [13, 14]
|
1153
|
+
def indexes(*values)
|
1154
|
+
index.to_a.values_at(*positions(*values))
|
1155
|
+
end
|
1156
|
+
|
1157
|
+
# Replaces specified values with a new value
|
1158
|
+
# @param [Array] old_values array of values to replace
|
1159
|
+
# @param [object] new_value new value to replace with
|
1160
|
+
# @note It performs the replace in place.
|
1161
|
+
# @return [Daru::Vector] Same vector itself with values
|
1162
|
+
# replaced with new value
|
1163
|
+
# @example
|
1164
|
+
# dv = Daru::Vector.new [1, 2, :a, :b]
|
1165
|
+
# dv.replace_values [:a, :b], nil
|
1166
|
+
# dv
|
1167
|
+
# # =>
|
1168
|
+
# # #<Daru::Vector:19903200 @name = nil @metadata = {} @size = 4 >
|
1169
|
+
# # nil
|
1170
|
+
# # 0 1
|
1171
|
+
# # 1 2
|
1172
|
+
# # 2 nil
|
1173
|
+
# # 3 nil
|
1174
|
+
def replace_values(old_values, new_value)
|
1175
|
+
old_values = [old_values] unless old_values.is_a? Array
|
1176
|
+
size.times do |pos|
|
1177
|
+
set_at([pos], new_value) if include_with_nan? old_values, at(pos)
|
1178
|
+
end
|
1179
|
+
self
|
1073
1180
|
end
|
1074
1181
|
|
1075
1182
|
# Returns a Vector containing only missing data (preserves indexes).
|
1076
1183
|
def only_missing as_a=:vector
|
1077
1184
|
if as_a == :vector
|
1078
|
-
self[*
|
1185
|
+
self[*indexes(*Daru::MISSING_VALUES)]
|
1079
1186
|
elsif as_a == :array
|
1080
|
-
self[*
|
1187
|
+
self[*indexes(*Daru::MISSING_VALUES)].to_a
|
1081
1188
|
end
|
1082
1189
|
end
|
1190
|
+
deprecate :only_missing, nil, 2016, 10
|
1083
1191
|
|
1084
1192
|
# Returns a Vector with only numerical data. Missing data is included
|
1085
1193
|
# but non-Numeric objects are excluded. Preserves index.
|
1086
1194
|
def only_numerics
|
1087
|
-
numeric_indexes =
|
1088
|
-
|
1089
|
-
|
1090
|
-
|
1091
|
-
end
|
1195
|
+
numeric_indexes =
|
1196
|
+
each_with_index
|
1197
|
+
.select { |v, _i| v.is_a?(Numeric) || v.nil? }
|
1198
|
+
.map(&:last)
|
1092
1199
|
|
1093
1200
|
self[*numeric_indexes]
|
1094
1201
|
end
|
1095
1202
|
|
1203
|
+
DATE_REGEXP = /^(\d{2}-\d{2}-\d{4}|\d{4}-\d{2}-\d{2})$/
|
1204
|
+
|
1096
1205
|
# Returns the database type for the vector, according to its content
|
1097
1206
|
def db_type
|
1098
1207
|
# first, detect any character not number
|
1099
|
-
|
1100
|
-
|
1101
|
-
|
1102
|
-
|
1103
|
-
|
1104
|
-
|
1105
|
-
|
1106
|
-
return 'DOUBLE'
|
1208
|
+
case
|
1209
|
+
when @data.any? { |v| v.to_s =~ DATE_REGEXP }
|
1210
|
+
'DATE'
|
1211
|
+
when @data.any? { |v| v.to_s =~ /[^0-9e.-]/ }
|
1212
|
+
'VARCHAR (255)'
|
1213
|
+
when @data.any? { |v| v.to_s =~ /\./ }
|
1214
|
+
'DOUBLE'
|
1107
1215
|
else
|
1108
|
-
|
1216
|
+
'INTEGER'
|
1109
1217
|
end
|
1110
1218
|
end
|
1111
1219
|
|
1112
1220
|
# Copies the structure of the vector (i.e the index, size, etc.) and fills all
|
1113
1221
|
# all values with nils.
|
1114
1222
|
def clone_structure
|
1115
|
-
Daru::Vector.new(([nil]
|
1223
|
+
Daru::Vector.new(([nil]*size), name: @name, index: @index.dup)
|
1116
1224
|
end
|
1117
1225
|
|
1118
1226
|
# Save the vector to a file
|
@@ -1129,38 +1237,156 @@ module Daru
|
|
1129
1237
|
data: @data.to_a,
|
1130
1238
|
dtype: @dtype,
|
1131
1239
|
name: @name,
|
1132
|
-
|
1133
|
-
index: @index,
|
1134
|
-
missing_values: @missing_values
|
1240
|
+
index: @index
|
1135
1241
|
)
|
1136
1242
|
end
|
1137
1243
|
|
1138
|
-
|
1139
|
-
h = Marshal.load(data)
|
1140
|
-
Daru::Vector.new(h[:data],
|
1141
|
-
index: h[:index],
|
1142
|
-
name: h[:name], metadata: h[:metadata],
|
1143
|
-
dtype: h[:dtype], missing_values: h[:missing_values])
|
1144
|
-
end
|
1145
|
-
|
1244
|
+
# :nocov:
|
1146
1245
|
def daru_vector(*)
|
1147
1246
|
self
|
1148
1247
|
end
|
1248
|
+
# :nocov:
|
1149
1249
|
|
1150
1250
|
alias :dv :daru_vector
|
1151
1251
|
|
1252
|
+
# Converts a non category type vector to category type vector.
|
1253
|
+
# @param [Hash] opts options to convert to category
|
1254
|
+
# @option opts [true, false] :ordered Specify if vector is ordered or not.
|
1255
|
+
# If it is ordered, it can be sorted and min, max like functions would work
|
1256
|
+
# @option opts [Array] :categories set categories in the specified order
|
1257
|
+
# @return [Daru::Vector] vector with type category
|
1258
|
+
def to_category opts={}
|
1259
|
+
dv = Daru::Vector.new to_a, type: :category, name: @name, index: @index
|
1260
|
+
dv.ordered = opts[:ordered] || false
|
1261
|
+
dv.categories = opts[:categories] if opts[:categories]
|
1262
|
+
dv
|
1263
|
+
end
|
1264
|
+
|
1152
1265
|
def method_missing(name, *args, &block)
|
1266
|
+
# FIXME: it is shamefully fragile. Should be either made stronger
|
1267
|
+
# (string/symbol dychotomy, informative errors) or removed totally. - zverok
|
1153
1268
|
if name =~ /(.+)\=/
|
1154
|
-
self[
|
1269
|
+
self[$1.to_sym] = args[0]
|
1155
1270
|
elsif has_index?(name)
|
1156
1271
|
self[name]
|
1157
1272
|
else
|
1158
|
-
super
|
1273
|
+
super
|
1274
|
+
end
|
1275
|
+
end
|
1276
|
+
|
1277
|
+
def respond_to_missing?(name, include_private=false)
|
1278
|
+
name.to_s.end_with?('=') || has_index?(name) || super
|
1279
|
+
end
|
1280
|
+
|
1281
|
+
# Partition a numeric variable into categories.
|
1282
|
+
# @param [Array<Numeric>] partitions an array whose consecutive elements
|
1283
|
+
# provide intervals for categories
|
1284
|
+
# @param [Hash] opts options to cut the partition
|
1285
|
+
# @option opts [:left, :right] :close_at specifies whether the interval closes at
|
1286
|
+
# the right side of left side
|
1287
|
+
# @option opts [Array] :labels names of the categories
|
1288
|
+
# @return [Daru::Vector] numeric variable converted to categorical variable
|
1289
|
+
# @example
|
1290
|
+
# heights = Daru::Vector.new [30, 35, 32, 50, 42, 51]
|
1291
|
+
# height_cat = heights.cut [30, 40, 50, 60], labels=['low', 'medium', 'high']
|
1292
|
+
# # => #<Daru::Vector(6)>
|
1293
|
+
# # 0 low
|
1294
|
+
# # 1 low
|
1295
|
+
# # 2 low
|
1296
|
+
# # 3 high
|
1297
|
+
# # 4 medium
|
1298
|
+
# # 5 high
|
1299
|
+
def cut partitions, opts={}
|
1300
|
+
close_at, labels = opts[:close_at] || :right, opts[:labels]
|
1301
|
+
partitions = partitions.to_a
|
1302
|
+
values = to_a.map { |val| cut_find_category partitions, val, close_at }
|
1303
|
+
cats = cut_categories(partitions, close_at)
|
1304
|
+
|
1305
|
+
dv = Daru::Vector.new values,
|
1306
|
+
index: @index,
|
1307
|
+
type: :category,
|
1308
|
+
categories: cats
|
1309
|
+
|
1310
|
+
# Rename categories if new labels provided
|
1311
|
+
if labels
|
1312
|
+
dv.rename_categories Hash[cats.zip(labels)]
|
1313
|
+
else
|
1314
|
+
dv
|
1315
|
+
end
|
1316
|
+
end
|
1317
|
+
|
1318
|
+
def positions(*values)
|
1319
|
+
case values
|
1320
|
+
when [nil]
|
1321
|
+
nil_positions
|
1322
|
+
when [Float::NAN]
|
1323
|
+
nan_positions
|
1324
|
+
when [nil, Float::NAN], [Float::NAN, nil]
|
1325
|
+
nil_positions + nan_positions
|
1326
|
+
else
|
1327
|
+
size.times.select { |i| include_with_nan? values, @data[i] }
|
1159
1328
|
end
|
1160
1329
|
end
|
1161
1330
|
|
1162
1331
|
private
|
1163
1332
|
|
1333
|
+
def nil_positions
|
1334
|
+
@nil_positions ||
|
1335
|
+
@nil_positions = size.times.select { |i| @data[i].nil? }
|
1336
|
+
end
|
1337
|
+
|
1338
|
+
def nan_positions
|
1339
|
+
@nan_positions ||
|
1340
|
+
@nan_positions = size.times.select do |i|
|
1341
|
+
@data[i].respond_to?(:nan?) && @data[i].nan?
|
1342
|
+
end
|
1343
|
+
end
|
1344
|
+
|
1345
|
+
def initialize_vector source, opts
|
1346
|
+
index, source = parse_source(source, opts)
|
1347
|
+
set_name opts[:name]
|
1348
|
+
|
1349
|
+
@data = cast_vector_to(opts[:dtype] || :array, source, opts[:nm_dtype])
|
1350
|
+
@index = Index.coerce(index || @data.size)
|
1351
|
+
|
1352
|
+
guard_sizes!
|
1353
|
+
|
1354
|
+
@possibly_changed_type = true
|
1355
|
+
# Include plotting functionality
|
1356
|
+
self.plotting_library = Daru.plotting_library
|
1357
|
+
end
|
1358
|
+
|
1359
|
+
def parse_source source, opts
|
1360
|
+
if source.is_a?(Hash)
|
1361
|
+
[source.keys, source.values]
|
1362
|
+
else
|
1363
|
+
[opts[:index], source || []]
|
1364
|
+
end
|
1365
|
+
end
|
1366
|
+
|
1367
|
+
def guard_sizes!
|
1368
|
+
if @index.size > @data.size
|
1369
|
+
cast(dtype: :array) # NM with nils seg faults
|
1370
|
+
@data.fill(nil, @data.size...@index.size)
|
1371
|
+
elsif @index.size < @data.size
|
1372
|
+
raise IndexError, "Expected index size >= vector size. Index size : #{@index.size}, vector size : #{@data.size}"
|
1373
|
+
end
|
1374
|
+
end
|
1375
|
+
|
1376
|
+
def guard_type_check value
|
1377
|
+
@possibly_changed_type = true \
|
1378
|
+
if object? && (value.nil? || value.is_a?(Numeric)) ||
|
1379
|
+
numeric? && !value.is_a?(Numeric) && !value.nil?
|
1380
|
+
end
|
1381
|
+
|
1382
|
+
def split_value key, v
|
1383
|
+
case
|
1384
|
+
when v.nil? then nil
|
1385
|
+
when v.include?(key) then 1
|
1386
|
+
else 0
|
1387
|
+
end
|
1388
|
+
end
|
1389
|
+
|
1164
1390
|
# For an array or hash of estimators methods, returns
|
1165
1391
|
# an array with three elements
|
1166
1392
|
# 1.- A hash with estimators names as keys and lambdas as values
|
@@ -1180,18 +1406,6 @@ module Daru
|
|
1180
1406
|
[h_est, h_est.keys, bss]
|
1181
1407
|
end
|
1182
1408
|
|
1183
|
-
def keep? a, b, order
|
1184
|
-
eval = yield(a, b)
|
1185
|
-
if order == :ascending
|
1186
|
-
return true if eval == -1
|
1187
|
-
return false if eval == 1
|
1188
|
-
elsif order == :descending
|
1189
|
-
return false if eval == -1
|
1190
|
-
return true if eval == 1
|
1191
|
-
end
|
1192
|
-
false
|
1193
|
-
end
|
1194
|
-
|
1195
1409
|
# Note: To maintain sanity, this _MUST_ be the _ONLY_ place in daru where the
|
1196
1410
|
# @dtype variable is set and the underlying data type of vector changed.
|
1197
1411
|
def cast_vector_to dtype, source=nil, nm_dtype=nil
|
@@ -1203,25 +1417,13 @@ module Daru
|
|
1203
1417
|
when :nmatrix then Daru::Accessors::NMatrixWrapper.new(source, self, nm_dtype)
|
1204
1418
|
when :gsl then Daru::Accessors::GSLWrapper.new(source, self)
|
1205
1419
|
when :mdarray then raise NotImplementedError, 'MDArray not yet supported.'
|
1206
|
-
else raise "Unknown dtype #{dtype}"
|
1420
|
+
else raise ArgumentError, "Unknown dtype #{dtype}"
|
1207
1421
|
end
|
1208
1422
|
|
1209
1423
|
@dtype = dtype || :array
|
1210
1424
|
new_vector
|
1211
1425
|
end
|
1212
1426
|
|
1213
|
-
def index_for index
|
1214
|
-
if @index.include?(index)
|
1215
|
-
@index[index]
|
1216
|
-
elsif index.is_a?(Numeric)
|
1217
|
-
index
|
1218
|
-
end
|
1219
|
-
end
|
1220
|
-
|
1221
|
-
def set_size
|
1222
|
-
@size = @data.size
|
1223
|
-
end
|
1224
|
-
|
1225
1427
|
def set_name name # rubocop:disable Style/AccessorMethodName
|
1226
1428
|
@name =
|
1227
1429
|
if name.is_a?(Numeric) then name
|
@@ -1232,38 +1434,109 @@ module Daru
|
|
1232
1434
|
end
|
1233
1435
|
end
|
1234
1436
|
|
1235
|
-
|
1236
|
-
|
1237
|
-
|
1238
|
-
|
1437
|
+
# Raises IndexError when one of the positions is an invalid position
|
1438
|
+
def validate_positions *positions
|
1439
|
+
positions = [positions] if positions.is_a? Integer
|
1440
|
+
positions.each do |pos|
|
1441
|
+
raise IndexError, "#{pos} is not a valid position." if pos >= size
|
1239
1442
|
end
|
1240
1443
|
end
|
1241
1444
|
|
1242
|
-
|
1243
|
-
|
1244
|
-
|
1445
|
+
# coerce ranges, integers and array in appropriate ways
|
1446
|
+
def coerce_positions *positions
|
1447
|
+
if positions.size == 1
|
1448
|
+
case positions.first
|
1449
|
+
when Integer
|
1450
|
+
positions.first
|
1451
|
+
when Range
|
1452
|
+
size.times.to_a[positions.first]
|
1453
|
+
else
|
1454
|
+
raise ArgumentError, 'Unkown position type.'
|
1455
|
+
end
|
1245
1456
|
else
|
1246
|
-
|
1457
|
+
positions
|
1247
1458
|
end
|
1248
1459
|
end
|
1249
1460
|
|
1250
|
-
|
1251
|
-
|
1252
|
-
|
1461
|
+
# Helper method for []=.
|
1462
|
+
# Assigs existing index to another value
|
1463
|
+
def modify_vector(indexes, val)
|
1464
|
+
positions = @index.pos(*indexes)
|
1465
|
+
|
1466
|
+
if positions.is_a? Numeric
|
1467
|
+
@data[positions] = val
|
1468
|
+
else
|
1469
|
+
positions.each { |pos| @data[pos] = val }
|
1470
|
+
end
|
1253
1471
|
end
|
1254
1472
|
|
1255
|
-
#
|
1256
|
-
#
|
1257
|
-
def
|
1258
|
-
|
1259
|
-
|
1260
|
-
|
1261
|
-
|
1262
|
-
|
1263
|
-
|
1264
|
-
|
1473
|
+
# Helper method for []=.
|
1474
|
+
# Add a new index and assign it value
|
1475
|
+
def insert_vector(indexes, val)
|
1476
|
+
new_index = @index.add(*indexes)
|
1477
|
+
# May be create +=
|
1478
|
+
(new_index.size - @index.size).times { @data << val }
|
1479
|
+
@index = new_index
|
1480
|
+
end
|
1481
|
+
|
1482
|
+
# Works similar to #[]= but also insert the vector in case index is not valid
|
1483
|
+
# It is there only to be accessed by Daru::DataFrame and not meant for user.
|
1484
|
+
def set indexes, val
|
1485
|
+
cast(dtype: :array) if val.nil? && dtype != :array
|
1486
|
+
guard_type_check(val)
|
1487
|
+
|
1488
|
+
if @index.valid?(*indexes)
|
1489
|
+
modify_vector(indexes, val)
|
1490
|
+
else
|
1491
|
+
insert_vector(indexes, val)
|
1492
|
+
end
|
1493
|
+
|
1494
|
+
update_position_cache
|
1495
|
+
end
|
1496
|
+
|
1497
|
+
def cut_find_category partitions, val, close_at
|
1498
|
+
case close_at
|
1499
|
+
when :right
|
1500
|
+
right_index = partitions.index { |i| i > val }
|
1501
|
+
raise ArgumentError, 'Invalid partition' if right_index.nil?
|
1502
|
+
left_index = right_index - 1
|
1503
|
+
"#{partitions[left_index]}-#{partitions[right_index]-1}"
|
1504
|
+
when :left
|
1505
|
+
right_index = partitions.index { |i| i >= val }
|
1506
|
+
raise ArgumentError, 'Invalid partition' if right_index.nil?
|
1507
|
+
left_index = right_index - 1
|
1508
|
+
"#{partitions[left_index]+1}-#{partitions[right_index]}"
|
1509
|
+
else
|
1510
|
+
raise ArgumentError, "Invalid parameter #{close_at} to close_at."
|
1511
|
+
end
|
1512
|
+
end
|
1513
|
+
|
1514
|
+
def cut_categories partitions, close_at
|
1515
|
+
case close_at
|
1516
|
+
when :right
|
1517
|
+
Array.new(partitions.size-1) do |left_index|
|
1518
|
+
"#{partitions[left_index]}-#{partitions[left_index+1]-1}"
|
1519
|
+
end
|
1520
|
+
when :left
|
1521
|
+
Array.new(partitions.size-1) do |left_index|
|
1522
|
+
"#{partitions[left_index]+1}-#{partitions[left_index+1]}"
|
1265
1523
|
end
|
1266
1524
|
end
|
1267
1525
|
end
|
1526
|
+
|
1527
|
+
def include_with_nan? array, value
|
1528
|
+
# Returns true if value is included in array.
|
1529
|
+
# Similar to include? but also works if value is Float::NAN
|
1530
|
+
if value.respond_to?(:nan?) && value.nan?
|
1531
|
+
array.any? { |i| i.respond_to?(:nan?) && i.nan? }
|
1532
|
+
else
|
1533
|
+
array.include? value
|
1534
|
+
end
|
1535
|
+
end
|
1536
|
+
|
1537
|
+
def update_position_cache
|
1538
|
+
@nil_positions = nil
|
1539
|
+
@nan_positions = nil
|
1540
|
+
end
|
1268
1541
|
end
|
1269
1542
|
end
|