daru 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rubocop.yml +99 -0
- data/.rubocop_todo.yml +44 -0
- data/.travis.yml +3 -1
- data/CONTRIBUTING.md +5 -1
- data/History.md +43 -0
- data/README.md +3 -4
- data/benchmarks/duplicating.rb +45 -0
- data/benchmarks/group_by.rb +7 -7
- data/benchmarks/joining.rb +52 -0
- data/benchmarks/sorting.rb +9 -2
- data/benchmarks/statistics.rb +39 -0
- data/daru.gemspec +4 -4
- data/lib/daru.rb +9 -9
- data/lib/daru/accessors/array_wrapper.rb +15 -11
- data/lib/daru/accessors/dataframe_by_row.rb +1 -1
- data/lib/daru/accessors/gsl_wrapper.rb +30 -19
- data/lib/daru/accessors/mdarray_wrapper.rb +1 -3
- data/lib/daru/accessors/nmatrix_wrapper.rb +15 -15
- data/lib/daru/core/group_by.rb +69 -16
- data/lib/daru/core/merge.rb +135 -151
- data/lib/daru/core/query.rb +9 -30
- data/lib/daru/dataframe.rb +476 -439
- data/lib/daru/date_time/index.rb +150 -137
- data/lib/daru/date_time/offsets.rb +45 -41
- data/lib/daru/extensions/rserve.rb +4 -4
- data/lib/daru/index.rb +88 -64
- data/lib/daru/io/io.rb +33 -34
- data/lib/daru/io/sql_data_source.rb +11 -11
- data/lib/daru/maths/arithmetic/dataframe.rb +19 -19
- data/lib/daru/maths/arithmetic/vector.rb +9 -14
- data/lib/daru/maths/statistics/dataframe.rb +89 -61
- data/lib/daru/maths/statistics/vector.rb +226 -97
- data/lib/daru/monkeys.rb +23 -30
- data/lib/daru/plotting/dataframe.rb +27 -28
- data/lib/daru/plotting/vector.rb +12 -13
- data/lib/daru/vector.rb +221 -330
- data/lib/daru/version.rb +2 -2
- data/spec/core/group_by_spec.rb +16 -0
- data/spec/core/merge_spec.rb +30 -14
- data/spec/dataframe_spec.rb +268 -14
- data/spec/index_spec.rb +23 -5
- data/spec/io/io_spec.rb +37 -16
- data/spec/math/statistics/dataframe_spec.rb +40 -8
- data/spec/math/statistics/vector_spec.rb +135 -10
- data/spec/monkeys_spec.rb +3 -3
- data/spec/vector_spec.rb +157 -25
- metadata +41 -21
data/lib/daru/monkeys.rb
CHANGED
@@ -6,35 +6,23 @@ class Array
|
|
6
6
|
# a.recode_repeated
|
7
7
|
# => ["a","b","c_1","c_2","d_1","d_2","d_3","e"]
|
8
8
|
def recode_repeated
|
9
|
-
if size
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
acc[v] = 1
|
14
|
-
else
|
15
|
-
acc[v] += 1
|
16
|
-
end
|
17
|
-
acc
|
18
|
-
end.select { |_k, v| v > 1 }.keys
|
19
|
-
|
20
|
-
ns = repeated.inject({}) do |acc, v|
|
21
|
-
acc[v] = 0
|
22
|
-
acc
|
23
|
-
end
|
9
|
+
return self if size == uniq.size
|
10
|
+
|
11
|
+
duplicated = group_by { |n| n }
|
12
|
+
.select { |_, g| g.size > 1 }.map(&:first)
|
24
13
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
14
|
+
counter = duplicated.collect { |n| [n, 0] }.to_h
|
15
|
+
|
16
|
+
collect do |n|
|
17
|
+
if counter.key?(n)
|
18
|
+
counter[n] += 1
|
19
|
+
'%s_%d' % [n, counter[n]]
|
20
|
+
else
|
21
|
+
n
|
32
22
|
end
|
33
|
-
else
|
34
|
-
self
|
35
23
|
end
|
36
24
|
end
|
37
|
-
|
25
|
+
|
38
26
|
def daru_vector name=nil, index=nil, dtype=:array
|
39
27
|
Daru::Vector.new self, name: name, index: index, dtype: dtype
|
40
28
|
end
|
@@ -54,20 +42,20 @@ class Range
|
|
54
42
|
alias_method :dv, :daru_vector
|
55
43
|
|
56
44
|
def to_index
|
57
|
-
Daru::Index.new
|
45
|
+
Daru::Index.new to_a
|
58
46
|
end
|
59
47
|
end
|
60
48
|
|
61
49
|
class Hash
|
62
50
|
def daru_vector index=nil, dtype=:array
|
63
|
-
Daru::Vector.new
|
51
|
+
Daru::Vector.new values[0], name: keys[0], index: index, dtype: dtype
|
64
52
|
end
|
65
53
|
|
66
54
|
alias_method :dv, :daru_vector
|
67
55
|
end
|
68
56
|
|
69
57
|
class NMatrix
|
70
|
-
def daru_vector
|
58
|
+
def daru_vector(name=nil, index=nil, *)
|
71
59
|
Daru::Vector.new self, name: name, index: index, dtype: :nmatrix
|
72
60
|
end
|
73
61
|
|
@@ -75,7 +63,7 @@ class NMatrix
|
|
75
63
|
end
|
76
64
|
|
77
65
|
class MDArray
|
78
|
-
def daru_vector
|
66
|
+
def daru_vector(name=nil, index=nil, *)
|
79
67
|
Daru::Vector.new self, name: name, index: index, dtype: :mdarray
|
80
68
|
end
|
81
69
|
|
@@ -90,7 +78,7 @@ end
|
|
90
78
|
|
91
79
|
class Matrix
|
92
80
|
def elementwise_division other
|
93
|
-
|
81
|
+
map.with_index do |e, index|
|
94
82
|
e / other.to_a.flatten[index]
|
95
83
|
end
|
96
84
|
end
|
@@ -104,4 +92,9 @@ class String
|
|
104
92
|
false
|
105
93
|
end
|
106
94
|
end
|
95
|
+
end
|
96
|
+
|
97
|
+
class Daru::DataFrame
|
98
|
+
# NOTE: This alias will soon be removed. Use to_h in all future work.
|
99
|
+
alias :to_hash :to_h
|
107
100
|
end
|
@@ -1,20 +1,20 @@
|
|
1
1
|
module Daru
|
2
2
|
module Plotting
|
3
3
|
module DataFrame
|
4
|
-
# Plots a DataFrame with Nyaplot on IRuby using the given options. Yields
|
4
|
+
# Plots a DataFrame with Nyaplot on IRuby using the given options. Yields
|
5
5
|
# the corresponding Nyaplot::Plot object and the Nyaplot::Diagram object
|
6
6
|
# to the block, if it is specified. See the nyaplot docs for info on how to
|
7
7
|
# further use these objects.
|
8
|
-
#
|
9
|
-
# Detailed instructions on use of the plotting API can be found in the
|
8
|
+
#
|
9
|
+
# Detailed instructions on use of the plotting API can be found in the
|
10
10
|
# notebooks whose links you can find in the README.
|
11
|
-
#
|
11
|
+
#
|
12
12
|
# == Options
|
13
|
-
#
|
13
|
+
#
|
14
14
|
# * +:type+ - Type of plot. Can be :scatter, :bar, :histogram, :line or :box.
|
15
15
|
# * +:x+ - Vector to be used for X co-ordinates.
|
16
16
|
# * +:y+ - Vector to be used for Y co-ordinates.
|
17
|
-
#
|
17
|
+
#
|
18
18
|
# == Usage
|
19
19
|
# # Simple bar chart
|
20
20
|
# df = Daru::DataFrame.new({a:['A', 'B', 'C', 'D', 'E'], b:[10,20,30,40,50]})
|
@@ -28,27 +28,28 @@ module Daru
|
|
28
28
|
types = extract_option :type, options
|
29
29
|
|
30
30
|
diagram =
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
31
|
+
case
|
32
|
+
when !([:scatter, :bar, :line, :histogram] & types).empty?
|
33
|
+
if single_diagram? options
|
34
|
+
add_single_diagram plot, options
|
35
|
+
else
|
36
|
+
add_multiple_diagrams plot, options
|
37
|
+
end
|
38
|
+
when types.include?(:box)
|
39
|
+
numeric = only_numerics(clone: false).dup_only_valid
|
40
|
+
|
41
|
+
plot.add_with_df(
|
42
|
+
numeric.to_nyaplotdf,
|
43
|
+
:box, *numeric.vectors.to_a
|
44
|
+
)
|
37
45
|
end
|
38
|
-
when types.include?(:box)
|
39
|
-
numeric = self.only_numerics(clone: false).dup_only_valid
|
40
|
-
|
41
|
-
plot.add_with_df(
|
42
|
-
numeric.to_nyaplotdf,
|
43
|
-
:box, *numeric.vectors.to_a)
|
44
|
-
end
|
45
46
|
|
46
47
|
yield(plot, diagram) if block_given?
|
47
48
|
|
48
49
|
plot.show
|
49
50
|
end
|
50
51
|
|
51
|
-
|
52
|
+
private
|
52
53
|
|
53
54
|
def single_diagram? options
|
54
55
|
options[:x] and options[:x].is_a?(Symbol)
|
@@ -56,12 +57,12 @@ module Daru
|
|
56
57
|
|
57
58
|
def add_single_diagram plot, options
|
58
59
|
args = [
|
59
|
-
|
60
|
-
options[:type],
|
60
|
+
to_nyaplotdf,
|
61
|
+
options[:type],
|
61
62
|
options[:x]
|
62
63
|
]
|
63
64
|
|
64
|
-
args << options[:y] if
|
65
|
+
args << options[:y] if options[:y]
|
65
66
|
|
66
67
|
plot.add_with_df(*args)
|
67
68
|
end
|
@@ -72,11 +73,10 @@ module Daru
|
|
72
73
|
y_vecs = extract_option :y, options
|
73
74
|
|
74
75
|
diagrams = []
|
75
|
-
nyaplot_df =
|
76
|
+
nyaplot_df = to_nyaplotdf
|
76
77
|
total = x_vecs.size
|
77
78
|
types = types.size < total ? types*total : types
|
78
79
|
|
79
|
-
|
80
80
|
(0...total).each do |i|
|
81
81
|
diagrams << plot.add_with_df(
|
82
82
|
nyaplot_df,
|
@@ -95,11 +95,10 @@ module Daru
|
|
95
95
|
o.is_a?(Array) ? o : [o]
|
96
96
|
else
|
97
97
|
arr = options.keys
|
98
|
-
arr.keep_if { |a| a =~ Regexp.new("\\A#{opt
|
98
|
+
arr.keep_if { |a| a =~ Regexp.new("\\A#{opt}") }.sort
|
99
99
|
arr.map { |a| options[a] }
|
100
100
|
end
|
101
101
|
end
|
102
|
-
|
103
102
|
end
|
104
103
|
end
|
105
|
-
end if Daru.has_nyaplot?
|
104
|
+
end if Daru.has_nyaplot?
|
data/lib/daru/plotting/vector.rb
CHANGED
@@ -1,22 +1,21 @@
|
|
1
1
|
module Daru
|
2
2
|
module Plotting
|
3
3
|
module Vector
|
4
|
-
|
5
4
|
# Plots a Vector with Nyaplot on IRuby using the given options. Yields the
|
6
|
-
# plot object (Nyaplot::Plot) and the diagram object (Nyaplot::Diagram)
|
7
|
-
# to the block, which can be used for setting various options as per the
|
5
|
+
# plot object (Nyaplot::Plot) and the diagram object (Nyaplot::Diagram)
|
6
|
+
# to the block, which can be used for setting various options as per the
|
8
7
|
# Nyaplot API.
|
9
|
-
#
|
8
|
+
#
|
10
9
|
# == Options
|
11
10
|
# type (:scatter, :bar, :histogram), title, x_label, y_label, color(true/false)
|
12
|
-
#
|
11
|
+
#
|
13
12
|
# == Usage
|
14
13
|
# vector = Daru::Vector.new [10,20,30,40], [:one, :two, :three, :four]
|
15
14
|
# vector.plot(type: :bar) do |plot|
|
16
15
|
# plot.title "My first plot"
|
17
16
|
# plot.width 1200
|
18
17
|
# end
|
19
|
-
def plot opts={}
|
18
|
+
def plot opts={}
|
20
19
|
options = {
|
21
20
|
type: :scatter
|
22
21
|
}.merge(opts)
|
@@ -24,16 +23,16 @@ module Daru
|
|
24
23
|
x_axis = options[:type] == :scatter ? Array.new(@size) { |i| i } : @index.to_a
|
25
24
|
plot = Nyaplot::Plot.new
|
26
25
|
diagram =
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
26
|
+
if [:box, :histogram].include? options[:type]
|
27
|
+
plot.add(options[:type], @data.to_a)
|
28
|
+
else
|
29
|
+
plot.add(options[:type], x_axis, @data.to_a)
|
30
|
+
end
|
32
31
|
|
33
32
|
yield plot, diagram if block_given?
|
34
|
-
|
33
|
+
|
35
34
|
plot.show
|
36
35
|
end
|
37
36
|
end
|
38
37
|
end
|
39
|
-
end if Daru.has_nyaplot?
|
38
|
+
end if Daru.has_nyaplot?
|
data/lib/daru/vector.rb
CHANGED
@@ -1,11 +1,9 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require '
|
4
|
-
require '
|
5
|
-
require '
|
6
|
-
require 'accessors/
|
7
|
-
require 'accessors/nmatrix_wrapper.rb'
|
8
|
-
require 'accessors/gsl_wrapper.rb'
|
1
|
+
require 'daru/maths/arithmetic/vector.rb'
|
2
|
+
require 'daru/maths/statistics/vector.rb'
|
3
|
+
require 'daru/plotting/vector.rb'
|
4
|
+
require 'daru/accessors/array_wrapper.rb'
|
5
|
+
require 'daru/accessors/nmatrix_wrapper.rb'
|
6
|
+
require 'daru/accessors/gsl_wrapper.rb'
|
9
7
|
|
10
8
|
module Daru
|
11
9
|
class Vector
|
@@ -28,10 +26,10 @@ module Daru
|
|
28
26
|
self
|
29
27
|
end
|
30
28
|
|
31
|
-
def each_with_index
|
29
|
+
def each_with_index
|
32
30
|
return to_enum(:each_with_index) unless block_given?
|
33
31
|
|
34
|
-
@index.each { |i|
|
32
|
+
@index.each { |i| yield(self[i], i) }
|
35
33
|
self
|
36
34
|
end
|
37
35
|
|
@@ -59,6 +57,10 @@ module Daru
|
|
59
57
|
# Store a hash of labels for values. Supplementary only. Recommend using index
|
60
58
|
# for proper usage.
|
61
59
|
attr_accessor :labels
|
60
|
+
# Store vector data in an array
|
61
|
+
attr_reader :data
|
62
|
+
# Attach arbitrary metadata to vector (usu a hash)
|
63
|
+
attr_accessor :metadata
|
62
64
|
|
63
65
|
# Create a Vector object.
|
64
66
|
#
|
@@ -97,11 +99,13 @@ module Daru
|
|
97
99
|
source = source.values
|
98
100
|
else
|
99
101
|
index = opts[:index]
|
100
|
-
source
|
102
|
+
source ||= []
|
101
103
|
end
|
102
|
-
name
|
104
|
+
name = opts[:name]
|
103
105
|
set_name name
|
104
106
|
|
107
|
+
@metadata = opts[:metadata] || {}
|
108
|
+
|
105
109
|
@data = cast_vector_to(opts[:dtype] || :array, source, opts[:nm_dtype])
|
106
110
|
@index = try_create_index(index || @data.size)
|
107
111
|
|
@@ -137,11 +141,10 @@ module Daru
|
|
137
141
|
value = opts[:value]
|
138
142
|
opts.delete :value
|
139
143
|
if block
|
140
|
-
|
144
|
+
Daru::Vector.new Array.new(n) { |i| block.call(i) }, opts
|
141
145
|
else
|
142
|
-
|
146
|
+
Daru::Vector.new Array.new(n) { value }, opts
|
143
147
|
end
|
144
|
-
vector
|
145
148
|
end
|
146
149
|
|
147
150
|
# Create a vector using (almost) any object
|
@@ -180,7 +183,7 @@ module Daru
|
|
180
183
|
when Daru::Vector
|
181
184
|
values.concat a.to_a
|
182
185
|
when Range
|
183
|
-
values.concat
|
186
|
+
values.concat a.to_a
|
184
187
|
else
|
185
188
|
values << a
|
186
189
|
end
|
@@ -199,56 +202,18 @@ module Daru
|
|
199
202
|
#
|
200
203
|
# # For vectors employing hierarchial multi index
|
201
204
|
#
|
202
|
-
def [](*
|
203
|
-
|
204
|
-
|
205
|
-
sub_index = @index[indexes]
|
206
|
-
result =
|
207
|
-
if sub_index.is_a?(Integer)
|
208
|
-
@data[sub_index]
|
209
|
-
else
|
210
|
-
elements = sub_index.map do |tuple|
|
211
|
-
@data[@index[tuple]]
|
212
|
-
end
|
205
|
+
def [](*input_indexes)
|
206
|
+
# Get a proper index object
|
207
|
+
indexes = @index[*input_indexes]
|
213
208
|
|
214
|
-
|
215
|
-
|
216
|
-
end
|
217
|
-
Daru::Vector.new(
|
218
|
-
elements, index: sub_index, name: @name, dtype: @dtype)
|
219
|
-
end
|
209
|
+
# If one object is asked return it
|
210
|
+
return @data[indexes] if indexes.is_a? Numeric
|
220
211
|
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
unless indexes[1]
|
227
|
-
case location
|
228
|
-
when Range
|
229
|
-
first = location.first
|
230
|
-
last = location.last
|
231
|
-
indexes = @index.slice first, last
|
232
|
-
else
|
233
|
-
pos = @index[location]
|
234
|
-
if pos.is_a?(Numeric)
|
235
|
-
return @data[pos]
|
236
|
-
else
|
237
|
-
indexes = pos
|
238
|
-
end
|
239
|
-
end
|
240
|
-
else
|
241
|
-
indexes = indexes.map { |e| named_index_for(e) }
|
242
|
-
end
|
243
|
-
|
244
|
-
begin
|
245
|
-
Daru::Vector.new(
|
246
|
-
indexes.map { |loc| @data[@index[loc]] },
|
247
|
-
name: @name, index: indexes, dtype: @dtype)
|
248
|
-
rescue NoMethodError
|
249
|
-
raise IndexError, "Specified index #{pos.inspect} does not exist."
|
250
|
-
end
|
251
|
-
end
|
212
|
+
# Form a new Vector using indexes and return it
|
213
|
+
Daru::Vector.new(
|
214
|
+
indexes.map { |loc| @data[@index[loc]] },
|
215
|
+
name: @name, metadata: @metadata.dup, index: indexes.conform(input_indexes), dtype: @dtype
|
216
|
+
)
|
252
217
|
end
|
253
218
|
|
254
219
|
# Just like in Hashes, you can specify the index label of the Daru::Vector
|
@@ -265,15 +230,14 @@ module Daru
|
|
265
230
|
# # b 2
|
266
231
|
# # c 3
|
267
232
|
def []=(*location, value)
|
268
|
-
cast(dtype: :array) if value.nil?
|
233
|
+
cast(dtype: :array) if value.nil? && dtype != :array
|
269
234
|
|
270
|
-
@possibly_changed_type = true if @type == :object
|
235
|
+
@possibly_changed_type = true if @type == :object && (value.nil? ||
|
271
236
|
value.is_a?(Numeric))
|
272
|
-
@possibly_changed_type = true if @type == :numeric
|
237
|
+
@possibly_changed_type = true if @type == :numeric && (!value.is_a?(Numeric) &&
|
273
238
|
!value.nil?)
|
274
239
|
|
275
|
-
|
276
|
-
pos = @index[location]
|
240
|
+
pos = @index[*location]
|
277
241
|
|
278
242
|
if pos.is_a?(Numeric)
|
279
243
|
@data[pos] = value
|
@@ -315,9 +279,7 @@ module Daru
|
|
315
279
|
# assignment/deletion of elements is done. Updating data this way is called
|
316
280
|
# lazy loading. To set or unset lazy loading, see the .lazy_update= method.
|
317
281
|
def update
|
318
|
-
|
319
|
-
set_missing_positions
|
320
|
-
end
|
282
|
+
Daru.lazy_update and set_missing_positions
|
321
283
|
end
|
322
284
|
|
323
285
|
# Two vectors are equal if the have the exact same index values corresponding
|
@@ -325,10 +287,8 @@ module Daru
|
|
325
287
|
def == other
|
326
288
|
case other
|
327
289
|
when Daru::Vector
|
328
|
-
@index == other.index
|
329
|
-
|
330
|
-
self[index] == other[index]
|
331
|
-
end
|
290
|
+
@index == other.index && @size == other.size &&
|
291
|
+
@index.all? { |index| self[index] == other[index] }
|
332
292
|
else
|
333
293
|
super
|
334
294
|
end
|
@@ -369,12 +329,12 @@ module Daru
|
|
369
329
|
# written above for functionality of each method. Use these methods with the
|
370
330
|
# `where` method to obtain the corresponding Vector/DataFrame.
|
371
331
|
{
|
372
|
-
:
|
373
|
-
:
|
374
|
-
:
|
375
|
-
:
|
376
|
-
:
|
377
|
-
:
|
332
|
+
eq: :==,
|
333
|
+
not_eq: :!=,
|
334
|
+
lt: :<,
|
335
|
+
lteq: :<=,
|
336
|
+
mt: :>,
|
337
|
+
mteq: :>=
|
378
338
|
}.each do |method, operator|
|
379
339
|
define_method(method) do |other|
|
380
340
|
mod = Daru::Core::Query
|
@@ -403,9 +363,8 @@ module Daru
|
|
403
363
|
def in other
|
404
364
|
other = Hash[other.zip(Array.new(other.size, 0))]
|
405
365
|
Daru::Core::Query::BoolArray.new(
|
406
|
-
@data.
|
407
|
-
memo << (other.
|
408
|
-
memo
|
366
|
+
@data.each_with_object([]) do |d, memo|
|
367
|
+
memo << (other.key?(d) ? true : false)
|
409
368
|
end
|
410
369
|
)
|
411
370
|
end
|
@@ -447,7 +406,7 @@ module Daru
|
|
447
406
|
# # 13 5
|
448
407
|
# # 15 1
|
449
408
|
def where bool_arry
|
450
|
-
Daru::Core::Query.vector_where @data.to_a, @index.to_a, bool_arry,
|
409
|
+
Daru::Core::Query.vector_where @data.to_a, @index.to_a, bool_arry, dtype
|
451
410
|
end
|
452
411
|
|
453
412
|
def head q=10
|
@@ -458,18 +417,21 @@ module Daru
|
|
458
417
|
self[(@size - q)..(@size-1)]
|
459
418
|
end
|
460
419
|
|
420
|
+
def empty?
|
421
|
+
@index.empty?
|
422
|
+
end
|
423
|
+
|
461
424
|
# Reports whether missing data is present in the Vector.
|
462
425
|
def has_missing_data?
|
463
426
|
!missing_positions.empty?
|
464
427
|
end
|
465
428
|
alias :flawed? :has_missing_data?
|
466
429
|
|
467
|
-
|
468
430
|
# Append an element to the vector by specifying the element and index
|
469
431
|
def concat element, index
|
470
|
-
raise IndexError,
|
432
|
+
raise IndexError, 'Expected new unique index' if @index.include? index
|
471
433
|
|
472
|
-
@index
|
434
|
+
@index |= [index]
|
473
435
|
@data[@index[index]] = element
|
474
436
|
|
475
437
|
set_size
|
@@ -486,14 +448,14 @@ module Daru
|
|
486
448
|
def cast opts={}
|
487
449
|
dt = opts[:dtype]
|
488
450
|
raise ArgumentError, "Unsupported dtype #{opts[:dtype]}" unless
|
489
|
-
dt == :array
|
451
|
+
dt == :array || dt == :nmatrix || dt == :gsl
|
490
452
|
|
491
453
|
@data = cast_vector_to dt unless @dtype == dt
|
492
454
|
end
|
493
455
|
|
494
456
|
# Delete an element by value
|
495
457
|
def delete element
|
496
|
-
|
458
|
+
delete_at index_of(element)
|
497
459
|
end
|
498
460
|
|
499
461
|
# Delete element by index
|
@@ -514,15 +476,12 @@ module Daru
|
|
514
476
|
def type
|
515
477
|
return @data.nm_dtype if dtype == :nmatrix
|
516
478
|
|
517
|
-
if @type.nil?
|
479
|
+
if @type.nil? || @possibly_changed_type
|
518
480
|
@type = :numeric
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
break
|
524
|
-
end
|
525
|
-
end
|
481
|
+
each do |e|
|
482
|
+
next if e.nil? || e.is_a?(Numeric)
|
483
|
+
@type = :object
|
484
|
+
break
|
526
485
|
end
|
527
486
|
@possibly_changed_type = false
|
528
487
|
end
|
@@ -532,18 +491,20 @@ module Daru
|
|
532
491
|
|
533
492
|
# Get index of element
|
534
493
|
def index_of element
|
535
|
-
|
494
|
+
case dtype
|
495
|
+
when :array then @index.key @data.index { |x| x.eql? element }
|
496
|
+
else @index.key @data.index(element)
|
497
|
+
end
|
536
498
|
end
|
537
499
|
|
538
500
|
# Keep only unique elements of the vector alongwith their indexes.
|
539
501
|
def uniq
|
540
502
|
uniq_vector = @data.uniq
|
541
|
-
new_index = uniq_vector.
|
503
|
+
new_index = uniq_vector.each_with_object([]) do |element, acc|
|
542
504
|
acc << index_of(element)
|
543
|
-
acc
|
544
505
|
end
|
545
506
|
|
546
|
-
Daru::Vector.new uniq_vector, name: @name, index: new_index, dtype: @dtype
|
507
|
+
Daru::Vector.new uniq_vector, name: @name, metadata: @metadata.dup, index: new_index, dtype: @dtype
|
547
508
|
end
|
548
509
|
|
549
510
|
def any? &block
|
@@ -570,29 +531,34 @@ module Daru
|
|
570
531
|
# v = Daru::Vector.new ["My first guitar", "jazz", "guitar"]
|
571
532
|
# # Say you want to sort these strings by length.
|
572
533
|
# v.sort(ascending: false) { |a,b| a.length <=> b.length }
|
573
|
-
def sort opts={}
|
534
|
+
def sort opts={}
|
574
535
|
opts = {
|
575
|
-
ascending: true
|
576
|
-
type: :quick_sort
|
536
|
+
ascending: true
|
577
537
|
}.merge(opts)
|
578
538
|
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
0
|
584
|
-
elsif a.nil?
|
585
|
-
-1
|
539
|
+
vector_index = @data.each_with_index
|
540
|
+
vector_index =
|
541
|
+
if block_given?
|
542
|
+
vector_index.sort { |a,b| yield(a[0], b[0]) }
|
586
543
|
else
|
587
|
-
|
544
|
+
vector_index.sort { |(av, ai), (bv, bi)|
|
545
|
+
if !av.nil? && !bv.nil?
|
546
|
+
av <=> bv
|
547
|
+
elsif av.nil? && bv.nil?
|
548
|
+
ai <=> bi
|
549
|
+
elsif av.nil?
|
550
|
+
opts[:ascending] ? -1 : 1
|
551
|
+
else
|
552
|
+
opts[:ascending] ? 1 : -1
|
553
|
+
end
|
554
|
+
}
|
588
555
|
end
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
index = Daru::Index.new index
|
556
|
+
vector_index.reverse! unless opts[:ascending]
|
557
|
+
vector, index = vector_index.transpose
|
558
|
+
old_index = @index.to_a
|
559
|
+
index = index.map { |i| old_index[i] }
|
594
560
|
|
595
|
-
Daru::Vector.new(vector, index: index, name: @name, dtype: @dtype)
|
561
|
+
Daru::Vector.new(vector, index: index, name: @name, metadata: @metadata.dup, dtype: @dtype)
|
596
562
|
end
|
597
563
|
|
598
564
|
# Just sort the data and get an Array in return using Enumerable#sort.
|
@@ -604,7 +570,7 @@ module Daru
|
|
604
570
|
# Returns *true* if the value passed is actually exists or is not marked as
|
605
571
|
# a *missing value*.
|
606
572
|
def exists? value
|
607
|
-
!@missing_values.
|
573
|
+
!@missing_values.key?(self[index_of(value)])
|
608
574
|
end
|
609
575
|
|
610
576
|
# Like map, but returns a Daru::Vector with the returned values.
|
@@ -624,7 +590,7 @@ module Daru
|
|
624
590
|
end
|
625
591
|
|
626
592
|
# Delete an element if block returns true. Destructive.
|
627
|
-
def delete_if
|
593
|
+
def delete_if
|
628
594
|
return to_enum(:delete_if) unless block_given?
|
629
595
|
|
630
596
|
keep_e = []
|
@@ -645,7 +611,7 @@ module Daru
|
|
645
611
|
end
|
646
612
|
|
647
613
|
# Keep an element if block returns true. Destructive.
|
648
|
-
def keep_if
|
614
|
+
def keep_if
|
649
615
|
return to_enum(:keep_if) unless block_given?
|
650
616
|
|
651
617
|
keep_e = []
|
@@ -667,12 +633,10 @@ module Daru
|
|
667
633
|
|
668
634
|
# Reports all values that doesn't comply with a condition.
|
669
635
|
# Returns a hash with the index of data and the invalid data.
|
670
|
-
def verify
|
636
|
+
def verify
|
671
637
|
h = {}
|
672
638
|
(0...size).each do |i|
|
673
|
-
|
674
|
-
h[i] = @data[i]
|
675
|
-
end
|
639
|
+
h[i] = @data[i] unless yield(@data[i])
|
676
640
|
end
|
677
641
|
|
678
642
|
h
|
@@ -683,7 +647,7 @@ module Daru
|
|
683
647
|
# a.splitted
|
684
648
|
# =>
|
685
649
|
# [["a","b"],["c","d"],["a","b"],["d"]]
|
686
|
-
def splitted sep=
|
650
|
+
def splitted sep=','
|
687
651
|
@data.map do |s|
|
688
652
|
if s.nil?
|
689
653
|
nil
|
@@ -708,14 +672,11 @@ module Daru
|
|
708
672
|
# "c"=>#<Daru::Vector:0x7f2dbcc09b08
|
709
673
|
# @data=[0, 1, 1]>}
|
710
674
|
#
|
711
|
-
def split_by_separator sep=
|
675
|
+
def split_by_separator sep=','
|
712
676
|
split_data = splitted sep
|
713
677
|
factors = split_data.flatten.uniq.compact
|
714
678
|
|
715
|
-
out = factors.
|
716
|
-
h[x] = []
|
717
|
-
h
|
718
|
-
end
|
679
|
+
out = factors.map { |x| [x, []] }.to_h
|
719
680
|
|
720
681
|
split_data.each do |r|
|
721
682
|
if r.nil?
|
@@ -724,22 +685,18 @@ module Daru
|
|
724
685
|
end
|
725
686
|
else
|
726
687
|
factors.each do |f|
|
727
|
-
out[f].push(r.include?(f) ? 1:0)
|
688
|
+
out[f].push(r.include?(f) ? 1 : 0)
|
728
689
|
end
|
729
690
|
end
|
730
691
|
end
|
731
692
|
|
732
|
-
out.
|
733
|
-
s[v[0]] = Daru::Vector.new v[1]
|
734
|
-
s
|
735
|
-
end
|
693
|
+
out.map { |k, v| [k, Daru::Vector.new(v)] }.to_h
|
736
694
|
end
|
737
695
|
|
738
|
-
def split_by_separator_freq(sep=
|
739
|
-
split_by_separator(sep).
|
740
|
-
|
741
|
-
|
742
|
-
end
|
696
|
+
def split_by_separator_freq(sep=',')
|
697
|
+
split_by_separator(sep).map do |k, v|
|
698
|
+
[k, v.inject { |s,x| s+x.to_i }]
|
699
|
+
end.to_h
|
743
700
|
end
|
744
701
|
|
745
702
|
def reset_index!
|
@@ -808,25 +765,25 @@ module Daru
|
|
808
765
|
# ts.lag # => [nil, 0.69, 0.23, 0.44, ...]
|
809
766
|
# ts.lag(2) # => [nil, nil, 0.69, 0.23, ...]
|
810
767
|
def lag k=1
|
811
|
-
return
|
768
|
+
return dup if k == 0
|
812
769
|
|
813
770
|
dat = @data.to_a.dup
|
814
771
|
(dat.size - 1).downto(k) { |i| dat[i] = dat[i - k] }
|
815
772
|
(0...k).each { |i| dat[i] = nil }
|
816
773
|
|
817
|
-
Daru::Vector.new(dat, index: @index, name: @name)
|
774
|
+
Daru::Vector.new(dat, index: @index, name: @name, metadata: @metadata.dup)
|
818
775
|
end
|
819
776
|
|
820
777
|
def detach_index
|
821
|
-
Daru::DataFrame.new(
|
778
|
+
Daru::DataFrame.new(
|
822
779
|
index: @index.to_a,
|
823
780
|
values: @data.to_a
|
824
|
-
|
781
|
+
)
|
825
782
|
end
|
826
783
|
|
827
784
|
# Non-destructive version of #replace_nils!
|
828
785
|
def replace_nils replacement
|
829
|
-
|
786
|
+
dup.replace_nils!(replacement)
|
830
787
|
end
|
831
788
|
|
832
789
|
# number of non-missing elements
|
@@ -857,23 +814,13 @@ module Daru
|
|
857
814
|
# If dtype != gsl, will convert data to GSL::Vector with to_a. Otherwise returns
|
858
815
|
# the stored GSL::Vector object.
|
859
816
|
def to_gsl
|
860
|
-
|
861
|
-
|
862
|
-
return @data.data
|
863
|
-
else
|
864
|
-
GSL::Vector.alloc only_valid(:array).to_a
|
865
|
-
end
|
866
|
-
else
|
867
|
-
raise NoMethodError, "Install gsl-nmatrix for access to this functionality."
|
868
|
-
end
|
817
|
+
raise NoMethodError, 'Install gsl-nmatrix for access to this functionality.' unless Daru.has_gsl?
|
818
|
+
dtype == :gsl ? @data.data : GSL::Vector.alloc(only_valid(:array).to_a)
|
869
819
|
end
|
870
820
|
|
871
|
-
# Convert to hash. Hash keys are indexes and values are the correspoding elements
|
872
|
-
def
|
873
|
-
@index.
|
874
|
-
hsh[index] = self[index]
|
875
|
-
hsh
|
876
|
-
end
|
821
|
+
# Convert to hash (explicit). Hash keys are indexes and values are the correspoding elements
|
822
|
+
def to_h
|
823
|
+
@index.map { |index| [index, self[index]] }.to_h
|
877
824
|
end
|
878
825
|
|
879
826
|
# Return an array
|
@@ -881,34 +828,33 @@ module Daru
|
|
881
828
|
@data.to_a
|
882
829
|
end
|
883
830
|
|
884
|
-
# Convert the hash from
|
885
|
-
def to_json
|
886
|
-
|
831
|
+
# Convert the hash from to_h to json
|
832
|
+
def to_json(*)
|
833
|
+
to_h.to_json
|
887
834
|
end
|
888
835
|
|
889
836
|
# Convert to html for iruby
|
890
837
|
def to_html threshold=30
|
891
838
|
name = @name || 'nil'
|
892
|
-
html =
|
893
|
-
|
894
|
-
|
895
|
-
"Daru::Vector:#{
|
896
|
-
|
897
|
-
|
839
|
+
html = '<table>' \
|
840
|
+
'<tr>' \
|
841
|
+
'<th colspan="2">' \
|
842
|
+
"Daru::Vector:#{object_id} " + " size: #{size}" \
|
843
|
+
'</th>' \
|
844
|
+
'</tr>'
|
898
845
|
html += '<tr><th> </th><th>' + name.to_s + '</th></tr>'
|
899
846
|
@index.each_with_index do |index, num|
|
900
847
|
html += '<tr><td>' + index.to_s + '</td>' + '<td>' + self[index].to_s + '</td></tr>'
|
901
848
|
|
902
|
-
if num
|
903
|
-
|
849
|
+
next if num <= threshold
|
850
|
+
html += '<tr><td>...</td><td>...</td></tr>'
|
904
851
|
|
905
|
-
|
906
|
-
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
|
911
|
-
end
|
852
|
+
last_index = @index.to_a.last
|
853
|
+
html += '<tr>' \
|
854
|
+
'<td>' + last_index.to_s + '</td>' \
|
855
|
+
'<td>' + self[last_index].to_s + '</td>' \
|
856
|
+
'</tr>'
|
857
|
+
break
|
912
858
|
end
|
913
859
|
html += '</table>'
|
914
860
|
|
@@ -920,34 +866,34 @@ module Daru
|
|
920
866
|
end
|
921
867
|
|
922
868
|
# Create a summary of the Vector using Report Builder.
|
923
|
-
def summary(method
|
869
|
+
def summary(method=:to_text)
|
924
870
|
ReportBuilder.new(no_title: true).add(self).send(method)
|
925
871
|
end
|
926
872
|
|
927
873
|
def report_building b
|
928
|
-
b.section(:
|
874
|
+
b.section(name: name) do |s|
|
929
875
|
s.text "n :#{size}"
|
930
876
|
s.text "n valid:#{n_valid}"
|
931
877
|
if @type == :object
|
932
878
|
s.text "factors: #{factors.to_a.join(',')}"
|
933
879
|
s.text "mode: #{mode}"
|
934
880
|
|
935
|
-
s.table(:
|
936
|
-
frequencies.sort_by
|
881
|
+
s.table(name: 'Distribution') do |t|
|
882
|
+
frequencies.sort_by(&:to_s).each do |k,v|
|
937
883
|
key = @index.include?(k) ? @index[k] : k
|
938
|
-
t.row [key, v
|
884
|
+
t.row [key, v, ('%0.2f%%' % (v.quo(n_valid)*100))]
|
939
885
|
end
|
940
886
|
end
|
941
887
|
end
|
942
888
|
|
943
|
-
s.text "median: #{median
|
889
|
+
s.text "median: #{median}" if @type==:numeric || @type==:numeric
|
944
890
|
if @type==:numeric
|
945
|
-
s.text
|
891
|
+
s.text 'mean: %0.4f' % mean
|
946
892
|
if sd
|
947
|
-
s.text
|
948
|
-
s.text
|
949
|
-
s.text
|
950
|
-
s.text
|
893
|
+
s.text 'std.dev.: %0.4f' % sd
|
894
|
+
s.text 'std.err.: %0.4f' % se
|
895
|
+
s.text 'skew: %0.4f' % skew
|
896
|
+
s.text 'kurtosis: %0.4f' % kurtosis
|
951
897
|
end
|
952
898
|
end
|
953
899
|
end
|
@@ -955,22 +901,26 @@ module Daru
|
|
955
901
|
|
956
902
|
# Over rides original inspect for pretty printing in irb
|
957
903
|
def inspect spacing=20, threshold=15
|
958
|
-
longest =
|
959
|
-
|
960
|
-
|
961
|
-
|
962
|
-
|
963
|
-
|
904
|
+
longest =
|
905
|
+
[
|
906
|
+
@name.to_s.size,
|
907
|
+
(@index.to_a.map(&:to_s).map(&:size).max || 0),
|
908
|
+
(@data.map(&:to_s).map(&:size).max || 0),
|
909
|
+
3 # 'nil'.size
|
910
|
+
].max
|
911
|
+
|
912
|
+
content = ''
|
964
913
|
longest = spacing if longest > spacing
|
965
914
|
name = @name || 'nil'
|
915
|
+
metadata = @metadata || 'nil'
|
966
916
|
formatter = "\n%#{longest}.#{longest}s %#{longest}.#{longest}s"
|
967
|
-
content += "\n
|
917
|
+
content += "\n#<#{self.class}:#{object_id} @name = #{name} @metadata = #{metadata} @size = #{size} >"
|
968
918
|
|
969
|
-
content +=
|
919
|
+
content += formatter % ['', name]
|
970
920
|
@index.each_with_index do |index, num|
|
971
|
-
content +=
|
921
|
+
content += formatter % [index.to_s, (self[*index] || 'nil').to_s]
|
972
922
|
if num > threshold
|
973
|
-
content +=
|
923
|
+
content += formatter % ['...', '...']
|
974
924
|
break
|
975
925
|
end
|
976
926
|
end
|
@@ -982,14 +932,10 @@ module Daru
|
|
982
932
|
# Create a new vector with a different index, and preserve the indexing of
|
983
933
|
# current elements.
|
984
934
|
def reindex new_index
|
985
|
-
vector = Daru::Vector.new([], index: new_index, name: @name)
|
935
|
+
vector = Daru::Vector.new([], index: new_index, name: @name, metadata: @metadata.dup)
|
986
936
|
|
987
937
|
new_index.each do |idx|
|
988
|
-
|
989
|
-
vector[idx] = self[idx]
|
990
|
-
else
|
991
|
-
vector[idx] = nil
|
992
|
-
end
|
938
|
+
vector[idx] = @index.include?(idx) ? self[idx] : nil
|
993
939
|
end
|
994
940
|
|
995
941
|
vector
|
@@ -998,9 +944,9 @@ module Daru
|
|
998
944
|
def index= idx
|
999
945
|
raise ArgumentError,
|
1000
946
|
"Size of supplied index #{index.size} does not match size of DataFrame" if
|
1001
|
-
idx.size !=
|
1002
|
-
raise ArgumentError,
|
1003
|
-
idx.
|
947
|
+
idx.size != size
|
948
|
+
raise ArgumentError, 'Can only assign type Index and its subclasses.' unless
|
949
|
+
idx.is_a?(Daru::Index)
|
1004
950
|
|
1005
951
|
@index = idx
|
1006
952
|
self
|
@@ -1020,7 +966,7 @@ module Daru
|
|
1020
966
|
|
1021
967
|
# Duplicate elements and indexes
|
1022
968
|
def dup
|
1023
|
-
Daru::Vector.new @data.dup, name: @name, index: @index.dup
|
969
|
+
Daru::Vector.new @data.dup, name: @name, metadata: @metadata.dup, index: @index.dup
|
1024
970
|
end
|
1025
971
|
|
1026
972
|
# == Bootstrap
|
@@ -1042,7 +988,7 @@ module Daru
|
|
1042
988
|
s ||= size
|
1043
989
|
h_est, es, bss = prepare_bootstrap(estimators)
|
1044
990
|
|
1045
|
-
nr.times do
|
991
|
+
nr.times do
|
1046
992
|
bs = sample_with_replacement(s)
|
1047
993
|
es.each do |estimator|
|
1048
994
|
bss[estimator].push(h_est[estimator].call(bs))
|
@@ -1079,10 +1025,7 @@ module Daru
|
|
1079
1025
|
nb = (size / k).to_i
|
1080
1026
|
h_est, es, ps = prepare_bootstrap(estimators)
|
1081
1027
|
|
1082
|
-
est_n = es.
|
1083
|
-
h[v] = h_est[v].call(self)
|
1084
|
-
h
|
1085
|
-
end
|
1028
|
+
est_n = es.map { |v| [v, h_est[v].call(self)] }.to_h
|
1086
1029
|
|
1087
1030
|
nb.times do |i|
|
1088
1031
|
other = @data.dup
|
@@ -1092,7 +1035,8 @@ module Daru
|
|
1092
1035
|
es.each do |estimator|
|
1093
1036
|
# Add pseudovalue
|
1094
1037
|
ps[estimator].push(
|
1095
|
-
nb * est_n[estimator] - (nb-1) * h_est[estimator].call(other)
|
1038
|
+
nb * est_n[estimator] - (nb-1) * h_est[estimator].call(other)
|
1039
|
+
)
|
1096
1040
|
end
|
1097
1041
|
end
|
1098
1042
|
|
@@ -1114,9 +1058,9 @@ module Daru
|
|
1114
1058
|
# Otherwise, a duplicate will be returned irrespective of
|
1115
1059
|
# presence of missing data.
|
1116
1060
|
def only_valid as_a=:vector, duplicate=true
|
1117
|
-
return
|
1118
|
-
return self if !has_missing_data?
|
1119
|
-
return
|
1061
|
+
return dup if !has_missing_data? && as_a == :vector && duplicate
|
1062
|
+
return self if !has_missing_data? && as_a == :vector && !duplicate
|
1063
|
+
return to_a if !has_missing_data? && as_a != :vector
|
1120
1064
|
|
1121
1065
|
new_index = @index.to_a - missing_positions
|
1122
1066
|
new_vector = new_index.map do |idx|
|
@@ -1125,7 +1069,7 @@ module Daru
|
|
1125
1069
|
|
1126
1070
|
return new_vector if as_a != :vector
|
1127
1071
|
|
1128
|
-
Daru::Vector.new new_vector, index: new_index, name: @name, dtype: dtype
|
1072
|
+
Daru::Vector.new new_vector, index: new_index, name: @name, metadata: @metadata.dup, dtype: dtype
|
1129
1073
|
end
|
1130
1074
|
|
1131
1075
|
# Returns a Vector containing only missing data (preserves indexes).
|
@@ -1143,30 +1087,32 @@ module Daru
|
|
1143
1087
|
numeric_indexes = []
|
1144
1088
|
|
1145
1089
|
each_with_index do |v, i|
|
1146
|
-
numeric_indexes << i if
|
1090
|
+
numeric_indexes << i if v.is_a?(Numeric) || @missing_values.key?(v)
|
1147
1091
|
end
|
1148
1092
|
|
1149
1093
|
self[*numeric_indexes]
|
1150
1094
|
end
|
1151
1095
|
|
1152
1096
|
# Returns the database type for the vector, according to its content
|
1153
|
-
def db_type
|
1097
|
+
def db_type
|
1154
1098
|
# first, detect any character not number
|
1155
|
-
if @data.find {|v| v.to_s=~/\d{2,2}-\d{2,2}-\d{4,4}/}
|
1156
|
-
|
1157
|
-
|
1158
|
-
return
|
1159
|
-
elsif @data.find {|v| v.to_s
|
1160
|
-
return
|
1099
|
+
if @data.find { |v| v.to_s=~/\d{2,2}-\d{2,2}-\d{4,4}/ } ||
|
1100
|
+
@data.find { |v| v.to_s=~/\d{4,4}-\d{2,2}-\d{2,2}/ }
|
1101
|
+
|
1102
|
+
return 'DATE'
|
1103
|
+
elsif @data.find { |v| v.to_s=~/[^0-9e.-]/ }
|
1104
|
+
return 'VARCHAR (255)'
|
1105
|
+
elsif @data.find { |v| v.to_s=~/\./ }
|
1106
|
+
return 'DOUBLE'
|
1161
1107
|
else
|
1162
|
-
return
|
1108
|
+
return 'INTEGER'
|
1163
1109
|
end
|
1164
1110
|
end
|
1165
1111
|
|
1166
1112
|
# Copies the structure of the vector (i.e the index, size, etc.) and fills all
|
1167
1113
|
# all values with nils.
|
1168
1114
|
def clone_structure
|
1169
|
-
Daru::Vector.new(([nil]*@size), name: @name, index: @index.dup)
|
1115
|
+
Daru::Vector.new(([nil]*@size), name: @name, metadata: @metadata.dup, index: @index.dup)
|
1170
1116
|
end
|
1171
1117
|
|
1172
1118
|
# Save the vector to a file
|
@@ -1178,29 +1124,33 @@ module Daru
|
|
1178
1124
|
Daru::IO.save self, filename
|
1179
1125
|
end
|
1180
1126
|
|
1181
|
-
def _dump(
|
1182
|
-
Marshal.dump(
|
1183
|
-
data:
|
1184
|
-
dtype:
|
1185
|
-
name:
|
1186
|
-
|
1187
|
-
|
1127
|
+
def _dump(*) # :nodoc:
|
1128
|
+
Marshal.dump(
|
1129
|
+
data: @data.to_a,
|
1130
|
+
dtype: @dtype,
|
1131
|
+
name: @name,
|
1132
|
+
metadata: @metadata,
|
1133
|
+
index: @index,
|
1134
|
+
missing_values: @missing_values
|
1135
|
+
)
|
1188
1136
|
end
|
1189
1137
|
|
1190
1138
|
def self._load(data) # :nodoc:
|
1191
1139
|
h = Marshal.load(data)
|
1192
|
-
Daru::Vector.new(h[:data],
|
1193
|
-
|
1140
|
+
Daru::Vector.new(h[:data],
|
1141
|
+
index: h[:index],
|
1142
|
+
name: h[:name], metadata: h[:metadata],
|
1143
|
+
dtype: h[:dtype], missing_values: h[:missing_values])
|
1194
1144
|
end
|
1195
1145
|
|
1196
|
-
def daru_vector
|
1146
|
+
def daru_vector(*)
|
1197
1147
|
self
|
1198
1148
|
end
|
1199
1149
|
|
1200
1150
|
alias :dv :daru_vector
|
1201
1151
|
|
1202
1152
|
def method_missing(name, *args, &block)
|
1203
|
-
if name
|
1153
|
+
if name =~ /(.+)\=/
|
1204
1154
|
self[name] = args[0]
|
1205
1155
|
elsif has_index?(name)
|
1206
1156
|
self[name]
|
@@ -1209,7 +1159,7 @@ module Daru
|
|
1209
1159
|
end
|
1210
1160
|
end
|
1211
1161
|
|
1212
|
-
|
1162
|
+
private
|
1213
1163
|
|
1214
1164
|
# For an array or hash of estimators methods, returns
|
1215
1165
|
# an array with three elements
|
@@ -1218,71 +1168,20 @@ module Daru
|
|
1218
1168
|
# 3.- A Hash with estimators names as keys and empty arrays as values
|
1219
1169
|
def prepare_bootstrap(estimators)
|
1220
1170
|
h_est = estimators
|
1221
|
-
h_est = [h_est] unless h_est.is_a?(Array)
|
1171
|
+
h_est = [h_est] unless h_est.is_a?(Array) || h_est.is_a?(Hash)
|
1222
1172
|
|
1223
1173
|
if h_est.is_a? Array
|
1224
|
-
h_est = h_est.
|
1225
|
-
|
1226
|
-
|
1227
|
-
end
|
1174
|
+
h_est = h_est.map do |est|
|
1175
|
+
[est, ->(v) { Daru::Vector.new(v).send(est) }]
|
1176
|
+
end.to_h
|
1228
1177
|
end
|
1229
|
-
bss = h_est.keys.
|
1178
|
+
bss = h_est.keys.map { |v| [v, []] }.to_h
|
1230
1179
|
|
1231
1180
|
[h_est, h_est.keys, bss]
|
1232
1181
|
end
|
1233
1182
|
|
1234
|
-
def
|
1235
|
-
|
1236
|
-
[vector, index]
|
1237
|
-
end
|
1238
|
-
|
1239
|
-
def recursive_quick_sort vector, index, order, left_lower, right_upper, &block
|
1240
|
-
if left_lower < right_upper
|
1241
|
-
left_upper, right_lower = partition(vector, index, order, left_lower, right_upper, &block)
|
1242
|
-
if left_upper - left_lower < right_upper - right_lower
|
1243
|
-
recursive_quick_sort(vector, index, order, left_lower, left_upper, &block)
|
1244
|
-
recursive_quick_sort(vector, index, order, right_lower, right_upper, &block)
|
1245
|
-
else
|
1246
|
-
recursive_quick_sort(vector, index, order, right_lower, right_upper, &block)
|
1247
|
-
recursive_quick_sort(vector, index, order, left_lower, left_upper, &block)
|
1248
|
-
end
|
1249
|
-
end
|
1250
|
-
end
|
1251
|
-
|
1252
|
-
def partition vector, index, order, left_lower, right_upper, &block
|
1253
|
-
mindex = (left_lower + right_upper) / 2
|
1254
|
-
mvalue = vector[mindex]
|
1255
|
-
i = left_lower
|
1256
|
-
j = right_upper
|
1257
|
-
opposite_order = order == :ascending ? :descending : :ascending
|
1258
|
-
|
1259
|
-
i += 1 while(keep?(vector[i], mvalue, order, &block))
|
1260
|
-
j -= 1 while(keep?(vector[j], mvalue, opposite_order, &block))
|
1261
|
-
|
1262
|
-
while i < j - 1
|
1263
|
-
vector[i], vector[j] = vector[j], vector[i]
|
1264
|
-
index[i], index[j] = index[j], index[i]
|
1265
|
-
i += 1
|
1266
|
-
j -= 1
|
1267
|
-
|
1268
|
-
i += 1 while(keep?(vector[i], mvalue, order, &block))
|
1269
|
-
j -= 1 while(keep?(vector[j], mvalue, opposite_order, &block))
|
1270
|
-
end
|
1271
|
-
|
1272
|
-
if i <= j
|
1273
|
-
if i < j
|
1274
|
-
vector[i], vector[j] = vector[j], vector[i]
|
1275
|
-
index[i], index[j] = index[j], index[i]
|
1276
|
-
end
|
1277
|
-
i += 1
|
1278
|
-
j -= 1
|
1279
|
-
end
|
1280
|
-
|
1281
|
-
[j,i]
|
1282
|
-
end
|
1283
|
-
|
1284
|
-
def keep? a, b, order, &block
|
1285
|
-
eval = block.call(a,b)
|
1183
|
+
def keep? a, b, order
|
1184
|
+
eval = yield(a, b)
|
1286
1185
|
if order == :ascending
|
1287
1186
|
return true if eval == -1
|
1288
1187
|
return false if eval == 1
|
@@ -1290,7 +1189,7 @@ module Daru
|
|
1290
1189
|
return false if eval == -1
|
1291
1190
|
return true if eval == 1
|
1292
1191
|
end
|
1293
|
-
|
1192
|
+
false
|
1294
1193
|
end
|
1295
1194
|
|
1296
1195
|
# Note: To maintain sanity, this _MUST_ be the _ONLY_ place in daru where the
|
@@ -1299,28 +1198,18 @@ module Daru
|
|
1299
1198
|
source = @data.to_a if source.nil?
|
1300
1199
|
|
1301
1200
|
new_vector =
|
1302
|
-
|
1303
|
-
|
1304
|
-
|
1305
|
-
|
1306
|
-
|
1307
|
-
|
1308
|
-
|
1201
|
+
case dtype
|
1202
|
+
when :array then Daru::Accessors::ArrayWrapper.new(source, self)
|
1203
|
+
when :nmatrix then Daru::Accessors::NMatrixWrapper.new(source, self, nm_dtype)
|
1204
|
+
when :gsl then Daru::Accessors::GSLWrapper.new(source, self)
|
1205
|
+
when :mdarray then raise NotImplementedError, 'MDArray not yet supported.'
|
1206
|
+
else raise "Unknown dtype #{dtype}"
|
1207
|
+
end
|
1309
1208
|
|
1310
1209
|
@dtype = dtype || :array
|
1311
1210
|
new_vector
|
1312
1211
|
end
|
1313
1212
|
|
1314
|
-
def named_index_for index
|
1315
|
-
if @index.include? index
|
1316
|
-
index
|
1317
|
-
elsif @index.key index
|
1318
|
-
@index.key index
|
1319
|
-
else
|
1320
|
-
raise IndexError, "Specified index #{index} does not exist."
|
1321
|
-
end
|
1322
|
-
end
|
1323
|
-
|
1324
1213
|
def index_for index
|
1325
1214
|
if @index.include?(index)
|
1326
1215
|
@index[index]
|
@@ -1333,25 +1222,25 @@ module Daru
|
|
1333
1222
|
@size = @data.size
|
1334
1223
|
end
|
1335
1224
|
|
1336
|
-
def set_name name
|
1225
|
+
def set_name name # rubocop:disable Style/AccessorMethodName
|
1337
1226
|
@name =
|
1338
|
-
|
1339
|
-
|
1340
|
-
|
1341
|
-
|
1342
|
-
|
1343
|
-
|
1227
|
+
if name.is_a?(Numeric) then name
|
1228
|
+
elsif name.is_a?(Array) then name.join # in case of MultiIndex tuple
|
1229
|
+
elsif name then name # anything but Numeric or nil
|
1230
|
+
else
|
1231
|
+
nil
|
1232
|
+
end
|
1344
1233
|
end
|
1345
1234
|
|
1346
1235
|
def set_missing_positions
|
1347
1236
|
@missing_positions = []
|
1348
1237
|
@index.each do |e|
|
1349
|
-
@missing_positions << e if
|
1238
|
+
@missing_positions << e if @missing_values.key?(self[e])
|
1350
1239
|
end
|
1351
1240
|
end
|
1352
1241
|
|
1353
1242
|
def try_create_index potential_index
|
1354
|
-
if potential_index.is_a?(Daru::MultiIndex)
|
1243
|
+
if potential_index.is_a?(Daru::MultiIndex) || potential_index.is_a?(Daru::Index)
|
1355
1244
|
potential_index
|
1356
1245
|
else
|
1357
1246
|
Daru::Index.new(potential_index)
|
@@ -1365,11 +1254,13 @@ module Daru
|
|
1365
1254
|
|
1366
1255
|
# Setup missing_values. The missing_values instance variable is set
|
1367
1256
|
# as a Hash for faster lookup times.
|
1368
|
-
def set_missing_values values_arry
|
1257
|
+
def set_missing_values values_arry # rubocop:disable Style/AccessorMethodName
|
1369
1258
|
@missing_values = {}
|
1370
1259
|
@missing_values[nil] = 0
|
1371
1260
|
if values_arry
|
1372
1261
|
values_arry.each do |e|
|
1262
|
+
# If dtype is :gsl then missing values have to be converted to float
|
1263
|
+
e = e.to_f if dtype == :gsl && e.is_a?(Numeric)
|
1373
1264
|
@missing_values[e] = 0
|
1374
1265
|
end
|
1375
1266
|
end
|