daru 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rubocop.yml +99 -0
- data/.rubocop_todo.yml +44 -0
- data/.travis.yml +3 -1
- data/CONTRIBUTING.md +5 -1
- data/History.md +43 -0
- data/README.md +3 -4
- data/benchmarks/duplicating.rb +45 -0
- data/benchmarks/group_by.rb +7 -7
- data/benchmarks/joining.rb +52 -0
- data/benchmarks/sorting.rb +9 -2
- data/benchmarks/statistics.rb +39 -0
- data/daru.gemspec +4 -4
- data/lib/daru.rb +9 -9
- data/lib/daru/accessors/array_wrapper.rb +15 -11
- data/lib/daru/accessors/dataframe_by_row.rb +1 -1
- data/lib/daru/accessors/gsl_wrapper.rb +30 -19
- data/lib/daru/accessors/mdarray_wrapper.rb +1 -3
- data/lib/daru/accessors/nmatrix_wrapper.rb +15 -15
- data/lib/daru/core/group_by.rb +69 -16
- data/lib/daru/core/merge.rb +135 -151
- data/lib/daru/core/query.rb +9 -30
- data/lib/daru/dataframe.rb +476 -439
- data/lib/daru/date_time/index.rb +150 -137
- data/lib/daru/date_time/offsets.rb +45 -41
- data/lib/daru/extensions/rserve.rb +4 -4
- data/lib/daru/index.rb +88 -64
- data/lib/daru/io/io.rb +33 -34
- data/lib/daru/io/sql_data_source.rb +11 -11
- data/lib/daru/maths/arithmetic/dataframe.rb +19 -19
- data/lib/daru/maths/arithmetic/vector.rb +9 -14
- data/lib/daru/maths/statistics/dataframe.rb +89 -61
- data/lib/daru/maths/statistics/vector.rb +226 -97
- data/lib/daru/monkeys.rb +23 -30
- data/lib/daru/plotting/dataframe.rb +27 -28
- data/lib/daru/plotting/vector.rb +12 -13
- data/lib/daru/vector.rb +221 -330
- data/lib/daru/version.rb +2 -2
- data/spec/core/group_by_spec.rb +16 -0
- data/spec/core/merge_spec.rb +30 -14
- data/spec/dataframe_spec.rb +268 -14
- data/spec/index_spec.rb +23 -5
- data/spec/io/io_spec.rb +37 -16
- data/spec/math/statistics/dataframe_spec.rb +40 -8
- data/spec/math/statistics/vector_spec.rb +135 -10
- data/spec/monkeys_spec.rb +3 -3
- data/spec/vector_spec.rb +157 -25
- metadata +41 -21
data/lib/daru/monkeys.rb
CHANGED
@@ -6,35 +6,23 @@ class Array
|
|
6
6
|
# a.recode_repeated
|
7
7
|
# => ["a","b","c_1","c_2","d_1","d_2","d_3","e"]
|
8
8
|
def recode_repeated
|
9
|
-
if size
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
acc[v] = 1
|
14
|
-
else
|
15
|
-
acc[v] += 1
|
16
|
-
end
|
17
|
-
acc
|
18
|
-
end.select { |_k, v| v > 1 }.keys
|
19
|
-
|
20
|
-
ns = repeated.inject({}) do |acc, v|
|
21
|
-
acc[v] = 0
|
22
|
-
acc
|
23
|
-
end
|
9
|
+
return self if size == uniq.size
|
10
|
+
|
11
|
+
duplicated = group_by { |n| n }
|
12
|
+
.select { |_, g| g.size > 1 }.map(&:first)
|
24
13
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
14
|
+
counter = duplicated.collect { |n| [n, 0] }.to_h
|
15
|
+
|
16
|
+
collect do |n|
|
17
|
+
if counter.key?(n)
|
18
|
+
counter[n] += 1
|
19
|
+
'%s_%d' % [n, counter[n]]
|
20
|
+
else
|
21
|
+
n
|
32
22
|
end
|
33
|
-
else
|
34
|
-
self
|
35
23
|
end
|
36
24
|
end
|
37
|
-
|
25
|
+
|
38
26
|
def daru_vector name=nil, index=nil, dtype=:array
|
39
27
|
Daru::Vector.new self, name: name, index: index, dtype: dtype
|
40
28
|
end
|
@@ -54,20 +42,20 @@ class Range
|
|
54
42
|
alias_method :dv, :daru_vector
|
55
43
|
|
56
44
|
def to_index
|
57
|
-
Daru::Index.new
|
45
|
+
Daru::Index.new to_a
|
58
46
|
end
|
59
47
|
end
|
60
48
|
|
61
49
|
class Hash
|
62
50
|
def daru_vector index=nil, dtype=:array
|
63
|
-
Daru::Vector.new
|
51
|
+
Daru::Vector.new values[0], name: keys[0], index: index, dtype: dtype
|
64
52
|
end
|
65
53
|
|
66
54
|
alias_method :dv, :daru_vector
|
67
55
|
end
|
68
56
|
|
69
57
|
class NMatrix
|
70
|
-
def daru_vector
|
58
|
+
def daru_vector(name=nil, index=nil, *)
|
71
59
|
Daru::Vector.new self, name: name, index: index, dtype: :nmatrix
|
72
60
|
end
|
73
61
|
|
@@ -75,7 +63,7 @@ class NMatrix
|
|
75
63
|
end
|
76
64
|
|
77
65
|
class MDArray
|
78
|
-
def daru_vector
|
66
|
+
def daru_vector(name=nil, index=nil, *)
|
79
67
|
Daru::Vector.new self, name: name, index: index, dtype: :mdarray
|
80
68
|
end
|
81
69
|
|
@@ -90,7 +78,7 @@ end
|
|
90
78
|
|
91
79
|
class Matrix
|
92
80
|
def elementwise_division other
|
93
|
-
|
81
|
+
map.with_index do |e, index|
|
94
82
|
e / other.to_a.flatten[index]
|
95
83
|
end
|
96
84
|
end
|
@@ -104,4 +92,9 @@ class String
|
|
104
92
|
false
|
105
93
|
end
|
106
94
|
end
|
95
|
+
end
|
96
|
+
|
97
|
+
class Daru::DataFrame
|
98
|
+
# NOTE: This alias will soon be removed. Use to_h in all future work.
|
99
|
+
alias :to_hash :to_h
|
107
100
|
end
|
@@ -1,20 +1,20 @@
|
|
1
1
|
module Daru
|
2
2
|
module Plotting
|
3
3
|
module DataFrame
|
4
|
-
# Plots a DataFrame with Nyaplot on IRuby using the given options. Yields
|
4
|
+
# Plots a DataFrame with Nyaplot on IRuby using the given options. Yields
|
5
5
|
# the corresponding Nyaplot::Plot object and the Nyaplot::Diagram object
|
6
6
|
# to the block, if it is specified. See the nyaplot docs for info on how to
|
7
7
|
# further use these objects.
|
8
|
-
#
|
9
|
-
# Detailed instructions on use of the plotting API can be found in the
|
8
|
+
#
|
9
|
+
# Detailed instructions on use of the plotting API can be found in the
|
10
10
|
# notebooks whose links you can find in the README.
|
11
|
-
#
|
11
|
+
#
|
12
12
|
# == Options
|
13
|
-
#
|
13
|
+
#
|
14
14
|
# * +:type+ - Type of plot. Can be :scatter, :bar, :histogram, :line or :box.
|
15
15
|
# * +:x+ - Vector to be used for X co-ordinates.
|
16
16
|
# * +:y+ - Vector to be used for Y co-ordinates.
|
17
|
-
#
|
17
|
+
#
|
18
18
|
# == Usage
|
19
19
|
# # Simple bar chart
|
20
20
|
# df = Daru::DataFrame.new({a:['A', 'B', 'C', 'D', 'E'], b:[10,20,30,40,50]})
|
@@ -28,27 +28,28 @@ module Daru
|
|
28
28
|
types = extract_option :type, options
|
29
29
|
|
30
30
|
diagram =
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
31
|
+
case
|
32
|
+
when !([:scatter, :bar, :line, :histogram] & types).empty?
|
33
|
+
if single_diagram? options
|
34
|
+
add_single_diagram plot, options
|
35
|
+
else
|
36
|
+
add_multiple_diagrams plot, options
|
37
|
+
end
|
38
|
+
when types.include?(:box)
|
39
|
+
numeric = only_numerics(clone: false).dup_only_valid
|
40
|
+
|
41
|
+
plot.add_with_df(
|
42
|
+
numeric.to_nyaplotdf,
|
43
|
+
:box, *numeric.vectors.to_a
|
44
|
+
)
|
37
45
|
end
|
38
|
-
when types.include?(:box)
|
39
|
-
numeric = self.only_numerics(clone: false).dup_only_valid
|
40
|
-
|
41
|
-
plot.add_with_df(
|
42
|
-
numeric.to_nyaplotdf,
|
43
|
-
:box, *numeric.vectors.to_a)
|
44
|
-
end
|
45
46
|
|
46
47
|
yield(plot, diagram) if block_given?
|
47
48
|
|
48
49
|
plot.show
|
49
50
|
end
|
50
51
|
|
51
|
-
|
52
|
+
private
|
52
53
|
|
53
54
|
def single_diagram? options
|
54
55
|
options[:x] and options[:x].is_a?(Symbol)
|
@@ -56,12 +57,12 @@ module Daru
|
|
56
57
|
|
57
58
|
def add_single_diagram plot, options
|
58
59
|
args = [
|
59
|
-
|
60
|
-
options[:type],
|
60
|
+
to_nyaplotdf,
|
61
|
+
options[:type],
|
61
62
|
options[:x]
|
62
63
|
]
|
63
64
|
|
64
|
-
args << options[:y] if
|
65
|
+
args << options[:y] if options[:y]
|
65
66
|
|
66
67
|
plot.add_with_df(*args)
|
67
68
|
end
|
@@ -72,11 +73,10 @@ module Daru
|
|
72
73
|
y_vecs = extract_option :y, options
|
73
74
|
|
74
75
|
diagrams = []
|
75
|
-
nyaplot_df =
|
76
|
+
nyaplot_df = to_nyaplotdf
|
76
77
|
total = x_vecs.size
|
77
78
|
types = types.size < total ? types*total : types
|
78
79
|
|
79
|
-
|
80
80
|
(0...total).each do |i|
|
81
81
|
diagrams << plot.add_with_df(
|
82
82
|
nyaplot_df,
|
@@ -95,11 +95,10 @@ module Daru
|
|
95
95
|
o.is_a?(Array) ? o : [o]
|
96
96
|
else
|
97
97
|
arr = options.keys
|
98
|
-
arr.keep_if { |a| a =~ Regexp.new("\\A#{opt
|
98
|
+
arr.keep_if { |a| a =~ Regexp.new("\\A#{opt}") }.sort
|
99
99
|
arr.map { |a| options[a] }
|
100
100
|
end
|
101
101
|
end
|
102
|
-
|
103
102
|
end
|
104
103
|
end
|
105
|
-
end if Daru.has_nyaplot?
|
104
|
+
end if Daru.has_nyaplot?
|
data/lib/daru/plotting/vector.rb
CHANGED
@@ -1,22 +1,21 @@
|
|
1
1
|
module Daru
|
2
2
|
module Plotting
|
3
3
|
module Vector
|
4
|
-
|
5
4
|
# Plots a Vector with Nyaplot on IRuby using the given options. Yields the
|
6
|
-
# plot object (Nyaplot::Plot) and the diagram object (Nyaplot::Diagram)
|
7
|
-
# to the block, which can be used for setting various options as per the
|
5
|
+
# plot object (Nyaplot::Plot) and the diagram object (Nyaplot::Diagram)
|
6
|
+
# to the block, which can be used for setting various options as per the
|
8
7
|
# Nyaplot API.
|
9
|
-
#
|
8
|
+
#
|
10
9
|
# == Options
|
11
10
|
# type (:scatter, :bar, :histogram), title, x_label, y_label, color(true/false)
|
12
|
-
#
|
11
|
+
#
|
13
12
|
# == Usage
|
14
13
|
# vector = Daru::Vector.new [10,20,30,40], [:one, :two, :three, :four]
|
15
14
|
# vector.plot(type: :bar) do |plot|
|
16
15
|
# plot.title "My first plot"
|
17
16
|
# plot.width 1200
|
18
17
|
# end
|
19
|
-
def plot opts={}
|
18
|
+
def plot opts={}
|
20
19
|
options = {
|
21
20
|
type: :scatter
|
22
21
|
}.merge(opts)
|
@@ -24,16 +23,16 @@ module Daru
|
|
24
23
|
x_axis = options[:type] == :scatter ? Array.new(@size) { |i| i } : @index.to_a
|
25
24
|
plot = Nyaplot::Plot.new
|
26
25
|
diagram =
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
26
|
+
if [:box, :histogram].include? options[:type]
|
27
|
+
plot.add(options[:type], @data.to_a)
|
28
|
+
else
|
29
|
+
plot.add(options[:type], x_axis, @data.to_a)
|
30
|
+
end
|
32
31
|
|
33
32
|
yield plot, diagram if block_given?
|
34
|
-
|
33
|
+
|
35
34
|
plot.show
|
36
35
|
end
|
37
36
|
end
|
38
37
|
end
|
39
|
-
end if Daru.has_nyaplot?
|
38
|
+
end if Daru.has_nyaplot?
|
data/lib/daru/vector.rb
CHANGED
@@ -1,11 +1,9 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require '
|
4
|
-
require '
|
5
|
-
require '
|
6
|
-
require 'accessors/
|
7
|
-
require 'accessors/nmatrix_wrapper.rb'
|
8
|
-
require 'accessors/gsl_wrapper.rb'
|
1
|
+
require 'daru/maths/arithmetic/vector.rb'
|
2
|
+
require 'daru/maths/statistics/vector.rb'
|
3
|
+
require 'daru/plotting/vector.rb'
|
4
|
+
require 'daru/accessors/array_wrapper.rb'
|
5
|
+
require 'daru/accessors/nmatrix_wrapper.rb'
|
6
|
+
require 'daru/accessors/gsl_wrapper.rb'
|
9
7
|
|
10
8
|
module Daru
|
11
9
|
class Vector
|
@@ -28,10 +26,10 @@ module Daru
|
|
28
26
|
self
|
29
27
|
end
|
30
28
|
|
31
|
-
def each_with_index
|
29
|
+
def each_with_index
|
32
30
|
return to_enum(:each_with_index) unless block_given?
|
33
31
|
|
34
|
-
@index.each { |i|
|
32
|
+
@index.each { |i| yield(self[i], i) }
|
35
33
|
self
|
36
34
|
end
|
37
35
|
|
@@ -59,6 +57,10 @@ module Daru
|
|
59
57
|
# Store a hash of labels for values. Supplementary only. Recommend using index
|
60
58
|
# for proper usage.
|
61
59
|
attr_accessor :labels
|
60
|
+
# Store vector data in an array
|
61
|
+
attr_reader :data
|
62
|
+
# Attach arbitrary metadata to vector (usu a hash)
|
63
|
+
attr_accessor :metadata
|
62
64
|
|
63
65
|
# Create a Vector object.
|
64
66
|
#
|
@@ -97,11 +99,13 @@ module Daru
|
|
97
99
|
source = source.values
|
98
100
|
else
|
99
101
|
index = opts[:index]
|
100
|
-
source
|
102
|
+
source ||= []
|
101
103
|
end
|
102
|
-
name
|
104
|
+
name = opts[:name]
|
103
105
|
set_name name
|
104
106
|
|
107
|
+
@metadata = opts[:metadata] || {}
|
108
|
+
|
105
109
|
@data = cast_vector_to(opts[:dtype] || :array, source, opts[:nm_dtype])
|
106
110
|
@index = try_create_index(index || @data.size)
|
107
111
|
|
@@ -137,11 +141,10 @@ module Daru
|
|
137
141
|
value = opts[:value]
|
138
142
|
opts.delete :value
|
139
143
|
if block
|
140
|
-
|
144
|
+
Daru::Vector.new Array.new(n) { |i| block.call(i) }, opts
|
141
145
|
else
|
142
|
-
|
146
|
+
Daru::Vector.new Array.new(n) { value }, opts
|
143
147
|
end
|
144
|
-
vector
|
145
148
|
end
|
146
149
|
|
147
150
|
# Create a vector using (almost) any object
|
@@ -180,7 +183,7 @@ module Daru
|
|
180
183
|
when Daru::Vector
|
181
184
|
values.concat a.to_a
|
182
185
|
when Range
|
183
|
-
values.concat
|
186
|
+
values.concat a.to_a
|
184
187
|
else
|
185
188
|
values << a
|
186
189
|
end
|
@@ -199,56 +202,18 @@ module Daru
|
|
199
202
|
#
|
200
203
|
# # For vectors employing hierarchial multi index
|
201
204
|
#
|
202
|
-
def [](*
|
203
|
-
|
204
|
-
|
205
|
-
sub_index = @index[indexes]
|
206
|
-
result =
|
207
|
-
if sub_index.is_a?(Integer)
|
208
|
-
@data[sub_index]
|
209
|
-
else
|
210
|
-
elements = sub_index.map do |tuple|
|
211
|
-
@data[@index[tuple]]
|
212
|
-
end
|
205
|
+
def [](*input_indexes)
|
206
|
+
# Get a proper index object
|
207
|
+
indexes = @index[*input_indexes]
|
213
208
|
|
214
|
-
|
215
|
-
|
216
|
-
end
|
217
|
-
Daru::Vector.new(
|
218
|
-
elements, index: sub_index, name: @name, dtype: @dtype)
|
219
|
-
end
|
209
|
+
# If one object is asked return it
|
210
|
+
return @data[indexes] if indexes.is_a? Numeric
|
220
211
|
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
unless indexes[1]
|
227
|
-
case location
|
228
|
-
when Range
|
229
|
-
first = location.first
|
230
|
-
last = location.last
|
231
|
-
indexes = @index.slice first, last
|
232
|
-
else
|
233
|
-
pos = @index[location]
|
234
|
-
if pos.is_a?(Numeric)
|
235
|
-
return @data[pos]
|
236
|
-
else
|
237
|
-
indexes = pos
|
238
|
-
end
|
239
|
-
end
|
240
|
-
else
|
241
|
-
indexes = indexes.map { |e| named_index_for(e) }
|
242
|
-
end
|
243
|
-
|
244
|
-
begin
|
245
|
-
Daru::Vector.new(
|
246
|
-
indexes.map { |loc| @data[@index[loc]] },
|
247
|
-
name: @name, index: indexes, dtype: @dtype)
|
248
|
-
rescue NoMethodError
|
249
|
-
raise IndexError, "Specified index #{pos.inspect} does not exist."
|
250
|
-
end
|
251
|
-
end
|
212
|
+
# Form a new Vector using indexes and return it
|
213
|
+
Daru::Vector.new(
|
214
|
+
indexes.map { |loc| @data[@index[loc]] },
|
215
|
+
name: @name, metadata: @metadata.dup, index: indexes.conform(input_indexes), dtype: @dtype
|
216
|
+
)
|
252
217
|
end
|
253
218
|
|
254
219
|
# Just like in Hashes, you can specify the index label of the Daru::Vector
|
@@ -265,15 +230,14 @@ module Daru
|
|
265
230
|
# # b 2
|
266
231
|
# # c 3
|
267
232
|
def []=(*location, value)
|
268
|
-
cast(dtype: :array) if value.nil?
|
233
|
+
cast(dtype: :array) if value.nil? && dtype != :array
|
269
234
|
|
270
|
-
@possibly_changed_type = true if @type == :object
|
235
|
+
@possibly_changed_type = true if @type == :object && (value.nil? ||
|
271
236
|
value.is_a?(Numeric))
|
272
|
-
@possibly_changed_type = true if @type == :numeric
|
237
|
+
@possibly_changed_type = true if @type == :numeric && (!value.is_a?(Numeric) &&
|
273
238
|
!value.nil?)
|
274
239
|
|
275
|
-
|
276
|
-
pos = @index[location]
|
240
|
+
pos = @index[*location]
|
277
241
|
|
278
242
|
if pos.is_a?(Numeric)
|
279
243
|
@data[pos] = value
|
@@ -315,9 +279,7 @@ module Daru
|
|
315
279
|
# assignment/deletion of elements is done. Updating data this way is called
|
316
280
|
# lazy loading. To set or unset lazy loading, see the .lazy_update= method.
|
317
281
|
def update
|
318
|
-
|
319
|
-
set_missing_positions
|
320
|
-
end
|
282
|
+
Daru.lazy_update and set_missing_positions
|
321
283
|
end
|
322
284
|
|
323
285
|
# Two vectors are equal if the have the exact same index values corresponding
|
@@ -325,10 +287,8 @@ module Daru
|
|
325
287
|
def == other
|
326
288
|
case other
|
327
289
|
when Daru::Vector
|
328
|
-
@index == other.index
|
329
|
-
|
330
|
-
self[index] == other[index]
|
331
|
-
end
|
290
|
+
@index == other.index && @size == other.size &&
|
291
|
+
@index.all? { |index| self[index] == other[index] }
|
332
292
|
else
|
333
293
|
super
|
334
294
|
end
|
@@ -369,12 +329,12 @@ module Daru
|
|
369
329
|
# written above for functionality of each method. Use these methods with the
|
370
330
|
# `where` method to obtain the corresponding Vector/DataFrame.
|
371
331
|
{
|
372
|
-
:
|
373
|
-
:
|
374
|
-
:
|
375
|
-
:
|
376
|
-
:
|
377
|
-
:
|
332
|
+
eq: :==,
|
333
|
+
not_eq: :!=,
|
334
|
+
lt: :<,
|
335
|
+
lteq: :<=,
|
336
|
+
mt: :>,
|
337
|
+
mteq: :>=
|
378
338
|
}.each do |method, operator|
|
379
339
|
define_method(method) do |other|
|
380
340
|
mod = Daru::Core::Query
|
@@ -403,9 +363,8 @@ module Daru
|
|
403
363
|
def in other
|
404
364
|
other = Hash[other.zip(Array.new(other.size, 0))]
|
405
365
|
Daru::Core::Query::BoolArray.new(
|
406
|
-
@data.
|
407
|
-
memo << (other.
|
408
|
-
memo
|
366
|
+
@data.each_with_object([]) do |d, memo|
|
367
|
+
memo << (other.key?(d) ? true : false)
|
409
368
|
end
|
410
369
|
)
|
411
370
|
end
|
@@ -447,7 +406,7 @@ module Daru
|
|
447
406
|
# # 13 5
|
448
407
|
# # 15 1
|
449
408
|
def where bool_arry
|
450
|
-
Daru::Core::Query.vector_where @data.to_a, @index.to_a, bool_arry,
|
409
|
+
Daru::Core::Query.vector_where @data.to_a, @index.to_a, bool_arry, dtype
|
451
410
|
end
|
452
411
|
|
453
412
|
def head q=10
|
@@ -458,18 +417,21 @@ module Daru
|
|
458
417
|
self[(@size - q)..(@size-1)]
|
459
418
|
end
|
460
419
|
|
420
|
+
def empty?
|
421
|
+
@index.empty?
|
422
|
+
end
|
423
|
+
|
461
424
|
# Reports whether missing data is present in the Vector.
|
462
425
|
def has_missing_data?
|
463
426
|
!missing_positions.empty?
|
464
427
|
end
|
465
428
|
alias :flawed? :has_missing_data?
|
466
429
|
|
467
|
-
|
468
430
|
# Append an element to the vector by specifying the element and index
|
469
431
|
def concat element, index
|
470
|
-
raise IndexError,
|
432
|
+
raise IndexError, 'Expected new unique index' if @index.include? index
|
471
433
|
|
472
|
-
@index
|
434
|
+
@index |= [index]
|
473
435
|
@data[@index[index]] = element
|
474
436
|
|
475
437
|
set_size
|
@@ -486,14 +448,14 @@ module Daru
|
|
486
448
|
def cast opts={}
|
487
449
|
dt = opts[:dtype]
|
488
450
|
raise ArgumentError, "Unsupported dtype #{opts[:dtype]}" unless
|
489
|
-
dt == :array
|
451
|
+
dt == :array || dt == :nmatrix || dt == :gsl
|
490
452
|
|
491
453
|
@data = cast_vector_to dt unless @dtype == dt
|
492
454
|
end
|
493
455
|
|
494
456
|
# Delete an element by value
|
495
457
|
def delete element
|
496
|
-
|
458
|
+
delete_at index_of(element)
|
497
459
|
end
|
498
460
|
|
499
461
|
# Delete element by index
|
@@ -514,15 +476,12 @@ module Daru
|
|
514
476
|
def type
|
515
477
|
return @data.nm_dtype if dtype == :nmatrix
|
516
478
|
|
517
|
-
if @type.nil?
|
479
|
+
if @type.nil? || @possibly_changed_type
|
518
480
|
@type = :numeric
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
break
|
524
|
-
end
|
525
|
-
end
|
481
|
+
each do |e|
|
482
|
+
next if e.nil? || e.is_a?(Numeric)
|
483
|
+
@type = :object
|
484
|
+
break
|
526
485
|
end
|
527
486
|
@possibly_changed_type = false
|
528
487
|
end
|
@@ -532,18 +491,20 @@ module Daru
|
|
532
491
|
|
533
492
|
# Get index of element
|
534
493
|
def index_of element
|
535
|
-
|
494
|
+
case dtype
|
495
|
+
when :array then @index.key @data.index { |x| x.eql? element }
|
496
|
+
else @index.key @data.index(element)
|
497
|
+
end
|
536
498
|
end
|
537
499
|
|
538
500
|
# Keep only unique elements of the vector alongwith their indexes.
|
539
501
|
def uniq
|
540
502
|
uniq_vector = @data.uniq
|
541
|
-
new_index = uniq_vector.
|
503
|
+
new_index = uniq_vector.each_with_object([]) do |element, acc|
|
542
504
|
acc << index_of(element)
|
543
|
-
acc
|
544
505
|
end
|
545
506
|
|
546
|
-
Daru::Vector.new uniq_vector, name: @name, index: new_index, dtype: @dtype
|
507
|
+
Daru::Vector.new uniq_vector, name: @name, metadata: @metadata.dup, index: new_index, dtype: @dtype
|
547
508
|
end
|
548
509
|
|
549
510
|
def any? &block
|
@@ -570,29 +531,34 @@ module Daru
|
|
570
531
|
# v = Daru::Vector.new ["My first guitar", "jazz", "guitar"]
|
571
532
|
# # Say you want to sort these strings by length.
|
572
533
|
# v.sort(ascending: false) { |a,b| a.length <=> b.length }
|
573
|
-
def sort opts={}
|
534
|
+
def sort opts={}
|
574
535
|
opts = {
|
575
|
-
ascending: true
|
576
|
-
type: :quick_sort
|
536
|
+
ascending: true
|
577
537
|
}.merge(opts)
|
578
538
|
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
0
|
584
|
-
elsif a.nil?
|
585
|
-
-1
|
539
|
+
vector_index = @data.each_with_index
|
540
|
+
vector_index =
|
541
|
+
if block_given?
|
542
|
+
vector_index.sort { |a,b| yield(a[0], b[0]) }
|
586
543
|
else
|
587
|
-
|
544
|
+
vector_index.sort { |(av, ai), (bv, bi)|
|
545
|
+
if !av.nil? && !bv.nil?
|
546
|
+
av <=> bv
|
547
|
+
elsif av.nil? && bv.nil?
|
548
|
+
ai <=> bi
|
549
|
+
elsif av.nil?
|
550
|
+
opts[:ascending] ? -1 : 1
|
551
|
+
else
|
552
|
+
opts[:ascending] ? 1 : -1
|
553
|
+
end
|
554
|
+
}
|
588
555
|
end
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
index = Daru::Index.new index
|
556
|
+
vector_index.reverse! unless opts[:ascending]
|
557
|
+
vector, index = vector_index.transpose
|
558
|
+
old_index = @index.to_a
|
559
|
+
index = index.map { |i| old_index[i] }
|
594
560
|
|
595
|
-
Daru::Vector.new(vector, index: index, name: @name, dtype: @dtype)
|
561
|
+
Daru::Vector.new(vector, index: index, name: @name, metadata: @metadata.dup, dtype: @dtype)
|
596
562
|
end
|
597
563
|
|
598
564
|
# Just sort the data and get an Array in return using Enumerable#sort.
|
@@ -604,7 +570,7 @@ module Daru
|
|
604
570
|
# Returns *true* if the value passed is actually exists or is not marked as
|
605
571
|
# a *missing value*.
|
606
572
|
def exists? value
|
607
|
-
!@missing_values.
|
573
|
+
!@missing_values.key?(self[index_of(value)])
|
608
574
|
end
|
609
575
|
|
610
576
|
# Like map, but returns a Daru::Vector with the returned values.
|
@@ -624,7 +590,7 @@ module Daru
|
|
624
590
|
end
|
625
591
|
|
626
592
|
# Delete an element if block returns true. Destructive.
|
627
|
-
def delete_if
|
593
|
+
def delete_if
|
628
594
|
return to_enum(:delete_if) unless block_given?
|
629
595
|
|
630
596
|
keep_e = []
|
@@ -645,7 +611,7 @@ module Daru
|
|
645
611
|
end
|
646
612
|
|
647
613
|
# Keep an element if block returns true. Destructive.
|
648
|
-
def keep_if
|
614
|
+
def keep_if
|
649
615
|
return to_enum(:keep_if) unless block_given?
|
650
616
|
|
651
617
|
keep_e = []
|
@@ -667,12 +633,10 @@ module Daru
|
|
667
633
|
|
668
634
|
# Reports all values that doesn't comply with a condition.
|
669
635
|
# Returns a hash with the index of data and the invalid data.
|
670
|
-
def verify
|
636
|
+
def verify
|
671
637
|
h = {}
|
672
638
|
(0...size).each do |i|
|
673
|
-
|
674
|
-
h[i] = @data[i]
|
675
|
-
end
|
639
|
+
h[i] = @data[i] unless yield(@data[i])
|
676
640
|
end
|
677
641
|
|
678
642
|
h
|
@@ -683,7 +647,7 @@ module Daru
|
|
683
647
|
# a.splitted
|
684
648
|
# =>
|
685
649
|
# [["a","b"],["c","d"],["a","b"],["d"]]
|
686
|
-
def splitted sep=
|
650
|
+
def splitted sep=','
|
687
651
|
@data.map do |s|
|
688
652
|
if s.nil?
|
689
653
|
nil
|
@@ -708,14 +672,11 @@ module Daru
|
|
708
672
|
# "c"=>#<Daru::Vector:0x7f2dbcc09b08
|
709
673
|
# @data=[0, 1, 1]>}
|
710
674
|
#
|
711
|
-
def split_by_separator sep=
|
675
|
+
def split_by_separator sep=','
|
712
676
|
split_data = splitted sep
|
713
677
|
factors = split_data.flatten.uniq.compact
|
714
678
|
|
715
|
-
out = factors.
|
716
|
-
h[x] = []
|
717
|
-
h
|
718
|
-
end
|
679
|
+
out = factors.map { |x| [x, []] }.to_h
|
719
680
|
|
720
681
|
split_data.each do |r|
|
721
682
|
if r.nil?
|
@@ -724,22 +685,18 @@ module Daru
|
|
724
685
|
end
|
725
686
|
else
|
726
687
|
factors.each do |f|
|
727
|
-
out[f].push(r.include?(f) ? 1:0)
|
688
|
+
out[f].push(r.include?(f) ? 1 : 0)
|
728
689
|
end
|
729
690
|
end
|
730
691
|
end
|
731
692
|
|
732
|
-
out.
|
733
|
-
s[v[0]] = Daru::Vector.new v[1]
|
734
|
-
s
|
735
|
-
end
|
693
|
+
out.map { |k, v| [k, Daru::Vector.new(v)] }.to_h
|
736
694
|
end
|
737
695
|
|
738
|
-
def split_by_separator_freq(sep=
|
739
|
-
split_by_separator(sep).
|
740
|
-
|
741
|
-
|
742
|
-
end
|
696
|
+
def split_by_separator_freq(sep=',')
|
697
|
+
split_by_separator(sep).map do |k, v|
|
698
|
+
[k, v.inject { |s,x| s+x.to_i }]
|
699
|
+
end.to_h
|
743
700
|
end
|
744
701
|
|
745
702
|
def reset_index!
|
@@ -808,25 +765,25 @@ module Daru
|
|
808
765
|
# ts.lag # => [nil, 0.69, 0.23, 0.44, ...]
|
809
766
|
# ts.lag(2) # => [nil, nil, 0.69, 0.23, ...]
|
810
767
|
def lag k=1
|
811
|
-
return
|
768
|
+
return dup if k == 0
|
812
769
|
|
813
770
|
dat = @data.to_a.dup
|
814
771
|
(dat.size - 1).downto(k) { |i| dat[i] = dat[i - k] }
|
815
772
|
(0...k).each { |i| dat[i] = nil }
|
816
773
|
|
817
|
-
Daru::Vector.new(dat, index: @index, name: @name)
|
774
|
+
Daru::Vector.new(dat, index: @index, name: @name, metadata: @metadata.dup)
|
818
775
|
end
|
819
776
|
|
820
777
|
def detach_index
|
821
|
-
Daru::DataFrame.new(
|
778
|
+
Daru::DataFrame.new(
|
822
779
|
index: @index.to_a,
|
823
780
|
values: @data.to_a
|
824
|
-
|
781
|
+
)
|
825
782
|
end
|
826
783
|
|
827
784
|
# Non-destructive version of #replace_nils!
|
828
785
|
def replace_nils replacement
|
829
|
-
|
786
|
+
dup.replace_nils!(replacement)
|
830
787
|
end
|
831
788
|
|
832
789
|
# number of non-missing elements
|
@@ -857,23 +814,13 @@ module Daru
|
|
857
814
|
# If dtype != gsl, will convert data to GSL::Vector with to_a. Otherwise returns
|
858
815
|
# the stored GSL::Vector object.
|
859
816
|
def to_gsl
|
860
|
-
|
861
|
-
|
862
|
-
return @data.data
|
863
|
-
else
|
864
|
-
GSL::Vector.alloc only_valid(:array).to_a
|
865
|
-
end
|
866
|
-
else
|
867
|
-
raise NoMethodError, "Install gsl-nmatrix for access to this functionality."
|
868
|
-
end
|
817
|
+
raise NoMethodError, 'Install gsl-nmatrix for access to this functionality.' unless Daru.has_gsl?
|
818
|
+
dtype == :gsl ? @data.data : GSL::Vector.alloc(only_valid(:array).to_a)
|
869
819
|
end
|
870
820
|
|
871
|
-
# Convert to hash. Hash keys are indexes and values are the correspoding elements
|
872
|
-
def
|
873
|
-
@index.
|
874
|
-
hsh[index] = self[index]
|
875
|
-
hsh
|
876
|
-
end
|
821
|
+
# Convert to hash (explicit). Hash keys are indexes and values are the correspoding elements
|
822
|
+
def to_h
|
823
|
+
@index.map { |index| [index, self[index]] }.to_h
|
877
824
|
end
|
878
825
|
|
879
826
|
# Return an array
|
@@ -881,34 +828,33 @@ module Daru
|
|
881
828
|
@data.to_a
|
882
829
|
end
|
883
830
|
|
884
|
-
# Convert the hash from
|
885
|
-
def to_json
|
886
|
-
|
831
|
+
# Convert the hash from to_h to json
|
832
|
+
def to_json(*)
|
833
|
+
to_h.to_json
|
887
834
|
end
|
888
835
|
|
889
836
|
# Convert to html for iruby
|
890
837
|
def to_html threshold=30
|
891
838
|
name = @name || 'nil'
|
892
|
-
html =
|
893
|
-
|
894
|
-
|
895
|
-
"Daru::Vector:#{
|
896
|
-
|
897
|
-
|
839
|
+
html = '<table>' \
|
840
|
+
'<tr>' \
|
841
|
+
'<th colspan="2">' \
|
842
|
+
"Daru::Vector:#{object_id} " + " size: #{size}" \
|
843
|
+
'</th>' \
|
844
|
+
'</tr>'
|
898
845
|
html += '<tr><th> </th><th>' + name.to_s + '</th></tr>'
|
899
846
|
@index.each_with_index do |index, num|
|
900
847
|
html += '<tr><td>' + index.to_s + '</td>' + '<td>' + self[index].to_s + '</td></tr>'
|
901
848
|
|
902
|
-
if num
|
903
|
-
|
849
|
+
next if num <= threshold
|
850
|
+
html += '<tr><td>...</td><td>...</td></tr>'
|
904
851
|
|
905
|
-
|
906
|
-
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
|
911
|
-
end
|
852
|
+
last_index = @index.to_a.last
|
853
|
+
html += '<tr>' \
|
854
|
+
'<td>' + last_index.to_s + '</td>' \
|
855
|
+
'<td>' + self[last_index].to_s + '</td>' \
|
856
|
+
'</tr>'
|
857
|
+
break
|
912
858
|
end
|
913
859
|
html += '</table>'
|
914
860
|
|
@@ -920,34 +866,34 @@ module Daru
|
|
920
866
|
end
|
921
867
|
|
922
868
|
# Create a summary of the Vector using Report Builder.
|
923
|
-
def summary(method
|
869
|
+
def summary(method=:to_text)
|
924
870
|
ReportBuilder.new(no_title: true).add(self).send(method)
|
925
871
|
end
|
926
872
|
|
927
873
|
def report_building b
|
928
|
-
b.section(:
|
874
|
+
b.section(name: name) do |s|
|
929
875
|
s.text "n :#{size}"
|
930
876
|
s.text "n valid:#{n_valid}"
|
931
877
|
if @type == :object
|
932
878
|
s.text "factors: #{factors.to_a.join(',')}"
|
933
879
|
s.text "mode: #{mode}"
|
934
880
|
|
935
|
-
s.table(:
|
936
|
-
frequencies.sort_by
|
881
|
+
s.table(name: 'Distribution') do |t|
|
882
|
+
frequencies.sort_by(&:to_s).each do |k,v|
|
937
883
|
key = @index.include?(k) ? @index[k] : k
|
938
|
-
t.row [key, v
|
884
|
+
t.row [key, v, ('%0.2f%%' % (v.quo(n_valid)*100))]
|
939
885
|
end
|
940
886
|
end
|
941
887
|
end
|
942
888
|
|
943
|
-
s.text "median: #{median
|
889
|
+
s.text "median: #{median}" if @type==:numeric || @type==:numeric
|
944
890
|
if @type==:numeric
|
945
|
-
s.text
|
891
|
+
s.text 'mean: %0.4f' % mean
|
946
892
|
if sd
|
947
|
-
s.text
|
948
|
-
s.text
|
949
|
-
s.text
|
950
|
-
s.text
|
893
|
+
s.text 'std.dev.: %0.4f' % sd
|
894
|
+
s.text 'std.err.: %0.4f' % se
|
895
|
+
s.text 'skew: %0.4f' % skew
|
896
|
+
s.text 'kurtosis: %0.4f' % kurtosis
|
951
897
|
end
|
952
898
|
end
|
953
899
|
end
|
@@ -955,22 +901,26 @@ module Daru
|
|
955
901
|
|
956
902
|
# Over rides original inspect for pretty printing in irb
|
957
903
|
def inspect spacing=20, threshold=15
|
958
|
-
longest =
|
959
|
-
|
960
|
-
|
961
|
-
|
962
|
-
|
963
|
-
|
904
|
+
longest =
|
905
|
+
[
|
906
|
+
@name.to_s.size,
|
907
|
+
(@index.to_a.map(&:to_s).map(&:size).max || 0),
|
908
|
+
(@data.map(&:to_s).map(&:size).max || 0),
|
909
|
+
3 # 'nil'.size
|
910
|
+
].max
|
911
|
+
|
912
|
+
content = ''
|
964
913
|
longest = spacing if longest > spacing
|
965
914
|
name = @name || 'nil'
|
915
|
+
metadata = @metadata || 'nil'
|
966
916
|
formatter = "\n%#{longest}.#{longest}s %#{longest}.#{longest}s"
|
967
|
-
content += "\n
|
917
|
+
content += "\n#<#{self.class}:#{object_id} @name = #{name} @metadata = #{metadata} @size = #{size} >"
|
968
918
|
|
969
|
-
content +=
|
919
|
+
content += formatter % ['', name]
|
970
920
|
@index.each_with_index do |index, num|
|
971
|
-
content +=
|
921
|
+
content += formatter % [index.to_s, (self[*index] || 'nil').to_s]
|
972
922
|
if num > threshold
|
973
|
-
content +=
|
923
|
+
content += formatter % ['...', '...']
|
974
924
|
break
|
975
925
|
end
|
976
926
|
end
|
@@ -982,14 +932,10 @@ module Daru
|
|
982
932
|
# Create a new vector with a different index, and preserve the indexing of
|
983
933
|
# current elements.
|
984
934
|
def reindex new_index
|
985
|
-
vector = Daru::Vector.new([], index: new_index, name: @name)
|
935
|
+
vector = Daru::Vector.new([], index: new_index, name: @name, metadata: @metadata.dup)
|
986
936
|
|
987
937
|
new_index.each do |idx|
|
988
|
-
|
989
|
-
vector[idx] = self[idx]
|
990
|
-
else
|
991
|
-
vector[idx] = nil
|
992
|
-
end
|
938
|
+
vector[idx] = @index.include?(idx) ? self[idx] : nil
|
993
939
|
end
|
994
940
|
|
995
941
|
vector
|
@@ -998,9 +944,9 @@ module Daru
|
|
998
944
|
def index= idx
|
999
945
|
raise ArgumentError,
|
1000
946
|
"Size of supplied index #{index.size} does not match size of DataFrame" if
|
1001
|
-
idx.size !=
|
1002
|
-
raise ArgumentError,
|
1003
|
-
idx.
|
947
|
+
idx.size != size
|
948
|
+
raise ArgumentError, 'Can only assign type Index and its subclasses.' unless
|
949
|
+
idx.is_a?(Daru::Index)
|
1004
950
|
|
1005
951
|
@index = idx
|
1006
952
|
self
|
@@ -1020,7 +966,7 @@ module Daru
|
|
1020
966
|
|
1021
967
|
# Duplicate elements and indexes
|
1022
968
|
def dup
|
1023
|
-
Daru::Vector.new @data.dup, name: @name, index: @index.dup
|
969
|
+
Daru::Vector.new @data.dup, name: @name, metadata: @metadata.dup, index: @index.dup
|
1024
970
|
end
|
1025
971
|
|
1026
972
|
# == Bootstrap
|
@@ -1042,7 +988,7 @@ module Daru
|
|
1042
988
|
s ||= size
|
1043
989
|
h_est, es, bss = prepare_bootstrap(estimators)
|
1044
990
|
|
1045
|
-
nr.times do
|
991
|
+
nr.times do
|
1046
992
|
bs = sample_with_replacement(s)
|
1047
993
|
es.each do |estimator|
|
1048
994
|
bss[estimator].push(h_est[estimator].call(bs))
|
@@ -1079,10 +1025,7 @@ module Daru
|
|
1079
1025
|
nb = (size / k).to_i
|
1080
1026
|
h_est, es, ps = prepare_bootstrap(estimators)
|
1081
1027
|
|
1082
|
-
est_n = es.
|
1083
|
-
h[v] = h_est[v].call(self)
|
1084
|
-
h
|
1085
|
-
end
|
1028
|
+
est_n = es.map { |v| [v, h_est[v].call(self)] }.to_h
|
1086
1029
|
|
1087
1030
|
nb.times do |i|
|
1088
1031
|
other = @data.dup
|
@@ -1092,7 +1035,8 @@ module Daru
|
|
1092
1035
|
es.each do |estimator|
|
1093
1036
|
# Add pseudovalue
|
1094
1037
|
ps[estimator].push(
|
1095
|
-
nb * est_n[estimator] - (nb-1) * h_est[estimator].call(other)
|
1038
|
+
nb * est_n[estimator] - (nb-1) * h_est[estimator].call(other)
|
1039
|
+
)
|
1096
1040
|
end
|
1097
1041
|
end
|
1098
1042
|
|
@@ -1114,9 +1058,9 @@ module Daru
|
|
1114
1058
|
# Otherwise, a duplicate will be returned irrespective of
|
1115
1059
|
# presence of missing data.
|
1116
1060
|
def only_valid as_a=:vector, duplicate=true
|
1117
|
-
return
|
1118
|
-
return self if !has_missing_data?
|
1119
|
-
return
|
1061
|
+
return dup if !has_missing_data? && as_a == :vector && duplicate
|
1062
|
+
return self if !has_missing_data? && as_a == :vector && !duplicate
|
1063
|
+
return to_a if !has_missing_data? && as_a != :vector
|
1120
1064
|
|
1121
1065
|
new_index = @index.to_a - missing_positions
|
1122
1066
|
new_vector = new_index.map do |idx|
|
@@ -1125,7 +1069,7 @@ module Daru
|
|
1125
1069
|
|
1126
1070
|
return new_vector if as_a != :vector
|
1127
1071
|
|
1128
|
-
Daru::Vector.new new_vector, index: new_index, name: @name, dtype: dtype
|
1072
|
+
Daru::Vector.new new_vector, index: new_index, name: @name, metadata: @metadata.dup, dtype: dtype
|
1129
1073
|
end
|
1130
1074
|
|
1131
1075
|
# Returns a Vector containing only missing data (preserves indexes).
|
@@ -1143,30 +1087,32 @@ module Daru
|
|
1143
1087
|
numeric_indexes = []
|
1144
1088
|
|
1145
1089
|
each_with_index do |v, i|
|
1146
|
-
numeric_indexes << i if
|
1090
|
+
numeric_indexes << i if v.is_a?(Numeric) || @missing_values.key?(v)
|
1147
1091
|
end
|
1148
1092
|
|
1149
1093
|
self[*numeric_indexes]
|
1150
1094
|
end
|
1151
1095
|
|
1152
1096
|
# Returns the database type for the vector, according to its content
|
1153
|
-
def db_type
|
1097
|
+
def db_type
|
1154
1098
|
# first, detect any character not number
|
1155
|
-
if @data.find {|v| v.to_s=~/\d{2,2}-\d{2,2}-\d{4,4}/}
|
1156
|
-
|
1157
|
-
|
1158
|
-
return
|
1159
|
-
elsif @data.find {|v| v.to_s
|
1160
|
-
return
|
1099
|
+
if @data.find { |v| v.to_s=~/\d{2,2}-\d{2,2}-\d{4,4}/ } ||
|
1100
|
+
@data.find { |v| v.to_s=~/\d{4,4}-\d{2,2}-\d{2,2}/ }
|
1101
|
+
|
1102
|
+
return 'DATE'
|
1103
|
+
elsif @data.find { |v| v.to_s=~/[^0-9e.-]/ }
|
1104
|
+
return 'VARCHAR (255)'
|
1105
|
+
elsif @data.find { |v| v.to_s=~/\./ }
|
1106
|
+
return 'DOUBLE'
|
1161
1107
|
else
|
1162
|
-
return
|
1108
|
+
return 'INTEGER'
|
1163
1109
|
end
|
1164
1110
|
end
|
1165
1111
|
|
1166
1112
|
# Copies the structure of the vector (i.e the index, size, etc.) and fills all
|
1167
1113
|
# all values with nils.
|
1168
1114
|
def clone_structure
|
1169
|
-
Daru::Vector.new(([nil]*@size), name: @name, index: @index.dup)
|
1115
|
+
Daru::Vector.new(([nil]*@size), name: @name, metadata: @metadata.dup, index: @index.dup)
|
1170
1116
|
end
|
1171
1117
|
|
1172
1118
|
# Save the vector to a file
|
@@ -1178,29 +1124,33 @@ module Daru
|
|
1178
1124
|
Daru::IO.save self, filename
|
1179
1125
|
end
|
1180
1126
|
|
1181
|
-
def _dump(
|
1182
|
-
Marshal.dump(
|
1183
|
-
data:
|
1184
|
-
dtype:
|
1185
|
-
name:
|
1186
|
-
|
1187
|
-
|
1127
|
+
def _dump(*) # :nodoc:
|
1128
|
+
Marshal.dump(
|
1129
|
+
data: @data.to_a,
|
1130
|
+
dtype: @dtype,
|
1131
|
+
name: @name,
|
1132
|
+
metadata: @metadata,
|
1133
|
+
index: @index,
|
1134
|
+
missing_values: @missing_values
|
1135
|
+
)
|
1188
1136
|
end
|
1189
1137
|
|
1190
1138
|
def self._load(data) # :nodoc:
|
1191
1139
|
h = Marshal.load(data)
|
1192
|
-
Daru::Vector.new(h[:data],
|
1193
|
-
|
1140
|
+
Daru::Vector.new(h[:data],
|
1141
|
+
index: h[:index],
|
1142
|
+
name: h[:name], metadata: h[:metadata],
|
1143
|
+
dtype: h[:dtype], missing_values: h[:missing_values])
|
1194
1144
|
end
|
1195
1145
|
|
1196
|
-
def daru_vector
|
1146
|
+
def daru_vector(*)
|
1197
1147
|
self
|
1198
1148
|
end
|
1199
1149
|
|
1200
1150
|
alias :dv :daru_vector
|
1201
1151
|
|
1202
1152
|
def method_missing(name, *args, &block)
|
1203
|
-
if name
|
1153
|
+
if name =~ /(.+)\=/
|
1204
1154
|
self[name] = args[0]
|
1205
1155
|
elsif has_index?(name)
|
1206
1156
|
self[name]
|
@@ -1209,7 +1159,7 @@ module Daru
|
|
1209
1159
|
end
|
1210
1160
|
end
|
1211
1161
|
|
1212
|
-
|
1162
|
+
private
|
1213
1163
|
|
1214
1164
|
# For an array or hash of estimators methods, returns
|
1215
1165
|
# an array with three elements
|
@@ -1218,71 +1168,20 @@ module Daru
|
|
1218
1168
|
# 3.- A Hash with estimators names as keys and empty arrays as values
|
1219
1169
|
def prepare_bootstrap(estimators)
|
1220
1170
|
h_est = estimators
|
1221
|
-
h_est = [h_est] unless h_est.is_a?(Array)
|
1171
|
+
h_est = [h_est] unless h_est.is_a?(Array) || h_est.is_a?(Hash)
|
1222
1172
|
|
1223
1173
|
if h_est.is_a? Array
|
1224
|
-
h_est = h_est.
|
1225
|
-
|
1226
|
-
|
1227
|
-
end
|
1174
|
+
h_est = h_est.map do |est|
|
1175
|
+
[est, ->(v) { Daru::Vector.new(v).send(est) }]
|
1176
|
+
end.to_h
|
1228
1177
|
end
|
1229
|
-
bss = h_est.keys.
|
1178
|
+
bss = h_est.keys.map { |v| [v, []] }.to_h
|
1230
1179
|
|
1231
1180
|
[h_est, h_est.keys, bss]
|
1232
1181
|
end
|
1233
1182
|
|
1234
|
-
def
|
1235
|
-
|
1236
|
-
[vector, index]
|
1237
|
-
end
|
1238
|
-
|
1239
|
-
def recursive_quick_sort vector, index, order, left_lower, right_upper, &block
|
1240
|
-
if left_lower < right_upper
|
1241
|
-
left_upper, right_lower = partition(vector, index, order, left_lower, right_upper, &block)
|
1242
|
-
if left_upper - left_lower < right_upper - right_lower
|
1243
|
-
recursive_quick_sort(vector, index, order, left_lower, left_upper, &block)
|
1244
|
-
recursive_quick_sort(vector, index, order, right_lower, right_upper, &block)
|
1245
|
-
else
|
1246
|
-
recursive_quick_sort(vector, index, order, right_lower, right_upper, &block)
|
1247
|
-
recursive_quick_sort(vector, index, order, left_lower, left_upper, &block)
|
1248
|
-
end
|
1249
|
-
end
|
1250
|
-
end
|
1251
|
-
|
1252
|
-
def partition vector, index, order, left_lower, right_upper, &block
|
1253
|
-
mindex = (left_lower + right_upper) / 2
|
1254
|
-
mvalue = vector[mindex]
|
1255
|
-
i = left_lower
|
1256
|
-
j = right_upper
|
1257
|
-
opposite_order = order == :ascending ? :descending : :ascending
|
1258
|
-
|
1259
|
-
i += 1 while(keep?(vector[i], mvalue, order, &block))
|
1260
|
-
j -= 1 while(keep?(vector[j], mvalue, opposite_order, &block))
|
1261
|
-
|
1262
|
-
while i < j - 1
|
1263
|
-
vector[i], vector[j] = vector[j], vector[i]
|
1264
|
-
index[i], index[j] = index[j], index[i]
|
1265
|
-
i += 1
|
1266
|
-
j -= 1
|
1267
|
-
|
1268
|
-
i += 1 while(keep?(vector[i], mvalue, order, &block))
|
1269
|
-
j -= 1 while(keep?(vector[j], mvalue, opposite_order, &block))
|
1270
|
-
end
|
1271
|
-
|
1272
|
-
if i <= j
|
1273
|
-
if i < j
|
1274
|
-
vector[i], vector[j] = vector[j], vector[i]
|
1275
|
-
index[i], index[j] = index[j], index[i]
|
1276
|
-
end
|
1277
|
-
i += 1
|
1278
|
-
j -= 1
|
1279
|
-
end
|
1280
|
-
|
1281
|
-
[j,i]
|
1282
|
-
end
|
1283
|
-
|
1284
|
-
def keep? a, b, order, &block
|
1285
|
-
eval = block.call(a,b)
|
1183
|
+
def keep? a, b, order
|
1184
|
+
eval = yield(a, b)
|
1286
1185
|
if order == :ascending
|
1287
1186
|
return true if eval == -1
|
1288
1187
|
return false if eval == 1
|
@@ -1290,7 +1189,7 @@ module Daru
|
|
1290
1189
|
return false if eval == -1
|
1291
1190
|
return true if eval == 1
|
1292
1191
|
end
|
1293
|
-
|
1192
|
+
false
|
1294
1193
|
end
|
1295
1194
|
|
1296
1195
|
# Note: To maintain sanity, this _MUST_ be the _ONLY_ place in daru where the
|
@@ -1299,28 +1198,18 @@ module Daru
|
|
1299
1198
|
source = @data.to_a if source.nil?
|
1300
1199
|
|
1301
1200
|
new_vector =
|
1302
|
-
|
1303
|
-
|
1304
|
-
|
1305
|
-
|
1306
|
-
|
1307
|
-
|
1308
|
-
|
1201
|
+
case dtype
|
1202
|
+
when :array then Daru::Accessors::ArrayWrapper.new(source, self)
|
1203
|
+
when :nmatrix then Daru::Accessors::NMatrixWrapper.new(source, self, nm_dtype)
|
1204
|
+
when :gsl then Daru::Accessors::GSLWrapper.new(source, self)
|
1205
|
+
when :mdarray then raise NotImplementedError, 'MDArray not yet supported.'
|
1206
|
+
else raise "Unknown dtype #{dtype}"
|
1207
|
+
end
|
1309
1208
|
|
1310
1209
|
@dtype = dtype || :array
|
1311
1210
|
new_vector
|
1312
1211
|
end
|
1313
1212
|
|
1314
|
-
def named_index_for index
|
1315
|
-
if @index.include? index
|
1316
|
-
index
|
1317
|
-
elsif @index.key index
|
1318
|
-
@index.key index
|
1319
|
-
else
|
1320
|
-
raise IndexError, "Specified index #{index} does not exist."
|
1321
|
-
end
|
1322
|
-
end
|
1323
|
-
|
1324
1213
|
def index_for index
|
1325
1214
|
if @index.include?(index)
|
1326
1215
|
@index[index]
|
@@ -1333,25 +1222,25 @@ module Daru
|
|
1333
1222
|
@size = @data.size
|
1334
1223
|
end
|
1335
1224
|
|
1336
|
-
def set_name name
|
1225
|
+
def set_name name # rubocop:disable Style/AccessorMethodName
|
1337
1226
|
@name =
|
1338
|
-
|
1339
|
-
|
1340
|
-
|
1341
|
-
|
1342
|
-
|
1343
|
-
|
1227
|
+
if name.is_a?(Numeric) then name
|
1228
|
+
elsif name.is_a?(Array) then name.join # in case of MultiIndex tuple
|
1229
|
+
elsif name then name # anything but Numeric or nil
|
1230
|
+
else
|
1231
|
+
nil
|
1232
|
+
end
|
1344
1233
|
end
|
1345
1234
|
|
1346
1235
|
def set_missing_positions
|
1347
1236
|
@missing_positions = []
|
1348
1237
|
@index.each do |e|
|
1349
|
-
@missing_positions << e if
|
1238
|
+
@missing_positions << e if @missing_values.key?(self[e])
|
1350
1239
|
end
|
1351
1240
|
end
|
1352
1241
|
|
1353
1242
|
def try_create_index potential_index
|
1354
|
-
if potential_index.is_a?(Daru::MultiIndex)
|
1243
|
+
if potential_index.is_a?(Daru::MultiIndex) || potential_index.is_a?(Daru::Index)
|
1355
1244
|
potential_index
|
1356
1245
|
else
|
1357
1246
|
Daru::Index.new(potential_index)
|
@@ -1365,11 +1254,13 @@ module Daru
|
|
1365
1254
|
|
1366
1255
|
# Setup missing_values. The missing_values instance variable is set
|
1367
1256
|
# as a Hash for faster lookup times.
|
1368
|
-
def set_missing_values values_arry
|
1257
|
+
def set_missing_values values_arry # rubocop:disable Style/AccessorMethodName
|
1369
1258
|
@missing_values = {}
|
1370
1259
|
@missing_values[nil] = 0
|
1371
1260
|
if values_arry
|
1372
1261
|
values_arry.each do |e|
|
1262
|
+
# If dtype is :gsl then missing values have to be converted to float
|
1263
|
+
e = e.to_f if dtype == :gsl && e.is_a?(Numeric)
|
1373
1264
|
@missing_values[e] = 0
|
1374
1265
|
end
|
1375
1266
|
end
|