daru 0.0.5 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.build.sh +14 -0
- data/.travis.yml +26 -4
- data/CONTRIBUTING.md +31 -0
- data/Gemfile +1 -2
- data/{History.txt → History.md} +110 -44
- data/README.md +21 -288
- data/Rakefile +1 -0
- data/daru.gemspec +12 -8
- data/lib/daru.rb +36 -1
- data/lib/daru/accessors/array_wrapper.rb +8 -3
- data/lib/daru/accessors/gsl_wrapper.rb +113 -0
- data/lib/daru/accessors/nmatrix_wrapper.rb +6 -17
- data/lib/daru/core/group_by.rb +0 -1
- data/lib/daru/dataframe.rb +1192 -83
- data/lib/daru/extensions/rserve.rb +21 -0
- data/lib/daru/index.rb +14 -0
- data/lib/daru/io/io.rb +170 -8
- data/lib/daru/maths/arithmetic/dataframe.rb +4 -3
- data/lib/daru/maths/arithmetic/vector.rb +4 -4
- data/lib/daru/maths/statistics/dataframe.rb +48 -27
- data/lib/daru/maths/statistics/vector.rb +215 -33
- data/lib/daru/monkeys.rb +53 -7
- data/lib/daru/multi_index.rb +21 -4
- data/lib/daru/plotting/dataframe.rb +83 -25
- data/lib/daru/plotting/vector.rb +9 -10
- data/lib/daru/vector.rb +596 -61
- data/lib/daru/version.rb +3 -0
- data/spec/accessors/wrappers_spec.rb +51 -0
- data/spec/core/group_by_spec.rb +0 -2
- data/spec/daru_spec.rb +58 -0
- data/spec/dataframe_spec.rb +768 -73
- data/spec/extensions/rserve_spec.rb +52 -0
- data/spec/fixtures/bank2.dat +200 -0
- data/spec/fixtures/repeated_fields.csv +7 -0
- data/spec/fixtures/scientific_notation.csv +4 -0
- data/spec/fixtures/test_xls.xls +0 -0
- data/spec/io/io_spec.rb +161 -24
- data/spec/math/arithmetic/dataframe_spec.rb +26 -7
- data/spec/math/arithmetic/vector_spec.rb +8 -0
- data/spec/math/statistics/dataframe_spec.rb +16 -1
- data/spec/math/statistics/vector_spec.rb +215 -47
- data/spec/spec_helper.rb +21 -2
- data/spec/vector_spec.rb +368 -12
- metadata +99 -16
- data/lib/version.rb +0 -3
- data/notebooks/grouping_splitting_pivots.ipynb +0 -529
- data/notebooks/intro_with_music_data_.ipynb +0 -303
data/lib/daru/monkeys.rb
CHANGED
@@ -1,5 +1,41 @@
|
|
1
1
|
class Array
|
2
|
-
|
2
|
+
# Recode repeated values on an array, adding the number of repetition
|
3
|
+
# at the end
|
4
|
+
# Example:
|
5
|
+
# a=%w{a b c c d d d e}
|
6
|
+
# a.recode_repeated
|
7
|
+
# => ["a","b","c_1","c_2","d_1","d_2","d_3","e"]
|
8
|
+
def recode_repeated
|
9
|
+
if size != uniq.size
|
10
|
+
# Find repeated
|
11
|
+
repeated = inject({}) do |acc, v|
|
12
|
+
if acc[v].nil?
|
13
|
+
acc[v] = 1
|
14
|
+
else
|
15
|
+
acc[v] += 1
|
16
|
+
end
|
17
|
+
acc
|
18
|
+
end.select { |_k, v| v > 1 }.keys
|
19
|
+
|
20
|
+
ns = repeated.inject({}) do |acc, v|
|
21
|
+
acc[v] = 0
|
22
|
+
acc
|
23
|
+
end
|
24
|
+
|
25
|
+
collect do |f|
|
26
|
+
if repeated.include? f
|
27
|
+
ns[f] += 1
|
28
|
+
sprintf('%s_%d', f, ns[f])
|
29
|
+
else
|
30
|
+
f
|
31
|
+
end
|
32
|
+
end
|
33
|
+
else
|
34
|
+
self
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def daru_vector name=nil, index=nil, dtype=:array
|
3
39
|
Daru::Vector.new self, name: name, index: index, dtype: dtype
|
4
40
|
end
|
5
41
|
|
@@ -11,8 +47,8 @@ class Array
|
|
11
47
|
end
|
12
48
|
|
13
49
|
class Range
|
14
|
-
def daru_vector name=nil, index=nil, dtype
|
15
|
-
Daru::Vector.new self, name: name, index: index, dtype:
|
50
|
+
def daru_vector name=nil, index=nil, dtype=:array
|
51
|
+
Daru::Vector.new self, name: name, index: index, dtype: dtype
|
16
52
|
end
|
17
53
|
|
18
54
|
alias_method :dv, :daru_vector
|
@@ -23,8 +59,8 @@ class Range
|
|
23
59
|
end
|
24
60
|
|
25
61
|
class Hash
|
26
|
-
def daru_vector index=nil, dtype
|
27
|
-
Daru::Vector.new self.values[0], name: self.keys[0], index: index, dtype:
|
62
|
+
def daru_vector index=nil, dtype=:array
|
63
|
+
Daru::Vector.new self.values[0], name: self.keys[0], index: index, dtype: dtype
|
28
64
|
end
|
29
65
|
|
30
66
|
alias_method :dv, :daru_vector
|
@@ -32,7 +68,7 @@ end
|
|
32
68
|
|
33
69
|
class NMatrix
|
34
70
|
def daru_vector name=nil, index=nil, dtype=NMatrix
|
35
|
-
Daru::Vector.new self, name: name, index: index, dtype:
|
71
|
+
Daru::Vector.new self, name: name, index: index, dtype: :nmatrix
|
36
72
|
end
|
37
73
|
|
38
74
|
alias_method :dv, :daru_vector
|
@@ -40,7 +76,7 @@ end
|
|
40
76
|
|
41
77
|
class MDArray
|
42
78
|
def daru_vector name=nil, index=nil, dtype=MDArray
|
43
|
-
Daru::Vector.new self, name: name, index: index, dtype:
|
79
|
+
Daru::Vector.new self, name: name, index: index, dtype: :mdarray
|
44
80
|
end
|
45
81
|
|
46
82
|
alias_method :dv, :daru_vector
|
@@ -58,4 +94,14 @@ class Matrix
|
|
58
94
|
e / other.to_a.flatten[index]
|
59
95
|
end
|
60
96
|
end
|
97
|
+
end
|
98
|
+
|
99
|
+
class String
|
100
|
+
def is_number?
|
101
|
+
if self =~ /^-?\d+[,.]?\d*(e-?\d+)?$/
|
102
|
+
true
|
103
|
+
else
|
104
|
+
false
|
105
|
+
end
|
106
|
+
end
|
61
107
|
end
|
data/lib/daru/multi_index.rb
CHANGED
@@ -17,12 +17,12 @@ module Daru
|
|
17
17
|
attr_reader :values
|
18
18
|
|
19
19
|
# Initialize a MultiIndex by passing a tuple of indexes. The order assigned
|
20
|
-
#
|
21
|
-
#
|
20
|
+
# to the multi index corresponds to the position of the tuple in the array
|
21
|
+
# of tuples.
|
22
22
|
#
|
23
23
|
# Although you can create your own hierarchially indexed Vectors and DataFrames,
|
24
|
-
#
|
25
|
-
#
|
24
|
+
# this class currently contains minimal error checking and is mainly used
|
25
|
+
# internally for summarizing, splitting and grouping of data.
|
26
26
|
#
|
27
27
|
# == Arguments
|
28
28
|
#
|
@@ -68,6 +68,19 @@ module Daru
|
|
68
68
|
end
|
69
69
|
end
|
70
70
|
|
71
|
+
def + other
|
72
|
+
other.flatten!
|
73
|
+
tuples = to_a
|
74
|
+
raise ArgumentError, "Incomplete tuple #{other}" unless
|
75
|
+
tuples.all? { |t| t.size == other.size }
|
76
|
+
|
77
|
+
Daru::MultiIndex.new(tuples << (other))
|
78
|
+
end
|
79
|
+
|
80
|
+
def empty?
|
81
|
+
@relation_hash.empty?
|
82
|
+
end
|
83
|
+
|
71
84
|
# Compare two MultiIndex objects for equality based on the contents of their
|
72
85
|
# relation hashes. Does not take object_id into account.
|
73
86
|
def == other
|
@@ -108,6 +121,10 @@ module Daru
|
|
108
121
|
tuple.empty? ? nil : tuple
|
109
122
|
end
|
110
123
|
|
124
|
+
def size
|
125
|
+
to_a.size
|
126
|
+
end
|
127
|
+
|
111
128
|
private
|
112
129
|
|
113
130
|
# Deep compare two hashes
|
@@ -1,9 +1,3 @@
|
|
1
|
-
begin
|
2
|
-
require 'nyaplot'
|
3
|
-
rescue LoadError => e
|
4
|
-
puts "#{e}"
|
5
|
-
end
|
6
|
-
|
7
1
|
module Daru
|
8
2
|
module Plotting
|
9
3
|
module DataFrame
|
@@ -12,36 +6,100 @@ module Daru
|
|
12
6
|
# to the block, if it is specified. See the nyaplot docs for info on how to
|
13
7
|
# further use these objects.
|
14
8
|
#
|
9
|
+
# Detailed instructions on use of the plotting API can be found in the
|
10
|
+
# notebooks whose links you can find in the README.
|
11
|
+
#
|
15
12
|
# == Options
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
# will keep vector :a as the X axis and :b as the Y axis. Passing [:a]
|
21
|
-
# keep :a as the X axis and plot the frequency with which :a appears
|
22
|
-
# on the Y axis.
|
23
|
-
# +:frame+ - Pass this as *true* to disable plotting the graph directly
|
24
|
-
# and instead manually create Nyaplot::Frame object inside the block using
|
25
|
-
# the Nyaplot::Plot object for plotting one or many graphs in a frame.
|
13
|
+
#
|
14
|
+
# * +:type+ - Type of plot. Can be :scatter, :bar, :histogram, :line or :box.
|
15
|
+
# * +:x+ - Vector to be used for X co-ordinates.
|
16
|
+
# * +:y+ - Vector to be used for Y co-ordinates.
|
26
17
|
#
|
27
18
|
# == Usage
|
28
|
-
#
|
29
|
-
# df.
|
19
|
+
# # Simple bar chart
|
20
|
+
# df = Daru::DataFrame.new({a:['A', 'B', 'C', 'D', 'E'], b:[10,20,30,40,50]})
|
21
|
+
# df.plot type: :bar, x: :a, y: :b
|
30
22
|
def plot opts={}
|
31
23
|
options = {
|
32
|
-
type: :scatter
|
33
|
-
frame: false,
|
34
|
-
legends: []
|
24
|
+
type: :scatter
|
35
25
|
}.merge(opts)
|
36
26
|
|
37
27
|
plot = Nyaplot::Plot.new
|
38
|
-
|
39
|
-
|
28
|
+
types = extract_option :type, options
|
29
|
+
|
30
|
+
diagram =
|
31
|
+
case
|
32
|
+
when !([:scatter, :bar, :line, :histogram] & types).empty?
|
33
|
+
if single_diagram? options
|
34
|
+
add_single_diagram plot, options
|
35
|
+
else
|
36
|
+
add_multiple_diagrams plot, options
|
37
|
+
end
|
38
|
+
when types.include?(:box)
|
39
|
+
numeric = self.only_numerics(clone: false).dup_only_valid
|
40
|
+
|
41
|
+
plot.add_with_df(
|
42
|
+
numeric.to_nyaplotdf,
|
43
|
+
:box, *numeric.vectors.to_a)
|
44
|
+
end
|
40
45
|
|
41
46
|
yield(plot, diagram) if block_given?
|
42
47
|
|
43
|
-
plot.show
|
48
|
+
plot.show
|
44
49
|
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def single_diagram? options
|
54
|
+
options[:x] and options[:x].is_a?(Symbol)
|
55
|
+
end
|
56
|
+
|
57
|
+
def add_single_diagram plot, options
|
58
|
+
args = [
|
59
|
+
self.to_nyaplotdf,
|
60
|
+
options[:type],
|
61
|
+
options[:x]
|
62
|
+
]
|
63
|
+
|
64
|
+
args << options[:y] if(options[:y])
|
65
|
+
|
66
|
+
plot.add_with_df(*args)
|
67
|
+
end
|
68
|
+
|
69
|
+
def add_multiple_diagrams plot, options
|
70
|
+
types = extract_option :type, options
|
71
|
+
x_vecs = extract_option :x, options
|
72
|
+
y_vecs = extract_option :y, options
|
73
|
+
|
74
|
+
diagrams = []
|
75
|
+
nyaplot_df = self.to_nyaplotdf
|
76
|
+
total = x_vecs.size
|
77
|
+
types = types.size < total ? types*total : types
|
78
|
+
|
79
|
+
|
80
|
+
(0...total).each do |i|
|
81
|
+
diagrams << plot.add_with_df(
|
82
|
+
nyaplot_df,
|
83
|
+
types[i],
|
84
|
+
x_vecs[i],
|
85
|
+
y_vecs[i]
|
86
|
+
)
|
87
|
+
end
|
88
|
+
|
89
|
+
diagrams
|
90
|
+
end
|
91
|
+
|
92
|
+
def extract_option opt, options
|
93
|
+
if options[opt]
|
94
|
+
o = options[opt]
|
95
|
+
o.is_a?(Array) ? o : [o]
|
96
|
+
else
|
97
|
+
arr = options.keys
|
98
|
+
arr.keep_if { |a| a =~ Regexp.new("\\A#{opt.to_s}") }.sort
|
99
|
+
arr.map { |a| options[a] }
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
45
103
|
end
|
46
104
|
end
|
47
|
-
end
|
105
|
+
end if Daru.has_nyaplot?
|
data/lib/daru/plotting/vector.rb
CHANGED
@@ -1,9 +1,3 @@
|
|
1
|
-
begin
|
2
|
-
require 'nyaplot'
|
3
|
-
rescue LoadError => e
|
4
|
-
puts "#{e}"
|
5
|
-
end
|
6
|
-
|
7
1
|
module Daru
|
8
2
|
module Plotting
|
9
3
|
module Vector
|
@@ -27,9 +21,14 @@ module Daru
|
|
27
21
|
type: :scatter
|
28
22
|
}.merge(opts)
|
29
23
|
|
30
|
-
x_axis
|
31
|
-
plot
|
32
|
-
diagram =
|
24
|
+
x_axis = options[:type] == :scatter ? Array.new(@size) { |i| i } : @index.to_a
|
25
|
+
plot = Nyaplot::Plot.new
|
26
|
+
diagram =
|
27
|
+
if [:box, :histogram].include? options[:type]
|
28
|
+
plot.add(options[:type], @data.to_a)
|
29
|
+
else
|
30
|
+
plot.add(options[:type], x_axis, @data.to_a)
|
31
|
+
end
|
33
32
|
|
34
33
|
yield plot, diagram if block_given?
|
35
34
|
|
@@ -37,4 +36,4 @@ module Daru
|
|
37
36
|
end
|
38
37
|
end
|
39
38
|
end
|
40
|
-
end
|
39
|
+
end if Daru.has_nyaplot?
|
data/lib/daru/vector.rb
CHANGED
@@ -5,13 +5,14 @@ require 'maths/statistics/vector.rb'
|
|
5
5
|
require 'plotting/vector.rb'
|
6
6
|
require 'accessors/array_wrapper.rb'
|
7
7
|
require 'accessors/nmatrix_wrapper.rb'
|
8
|
+
require 'accessors/gsl_wrapper.rb'
|
8
9
|
|
9
10
|
module Daru
|
10
11
|
class Vector
|
11
12
|
include Enumerable
|
12
13
|
include Daru::Maths::Arithmetic::Vector
|
13
14
|
include Daru::Maths::Statistics::Vector
|
14
|
-
include Daru::Plotting::Vector
|
15
|
+
include Daru::Plotting::Vector if Daru.has_nyaplot?
|
15
16
|
|
16
17
|
def each(&block)
|
17
18
|
return to_enum(:each) unless block_given?
|
@@ -20,37 +21,55 @@ module Daru
|
|
20
21
|
self
|
21
22
|
end
|
22
23
|
|
23
|
-
def
|
24
|
-
return to_enum(:
|
24
|
+
def each_index(&block)
|
25
|
+
return to_enum(:each_index) unless block_given?
|
25
26
|
|
26
|
-
@
|
27
|
+
@index.each(&block)
|
27
28
|
self
|
28
29
|
end
|
29
30
|
|
30
|
-
def
|
31
|
-
return to_enum(:
|
31
|
+
def each_with_index(&block)
|
32
|
+
return to_enum(:each_with_index) unless block_given?
|
32
33
|
|
33
|
-
|
34
|
+
@index.each { |i| yield(self[i], i) }
|
35
|
+
self
|
34
36
|
end
|
35
37
|
|
36
|
-
|
38
|
+
def map!(&block)
|
39
|
+
return to_enum(:map!) unless block_given?
|
40
|
+
@data.map!(&block)
|
41
|
+
update
|
42
|
+
self
|
43
|
+
end
|
37
44
|
|
45
|
+
# The name of the Daru::Vector. String.
|
38
46
|
attr_reader :name
|
47
|
+
# The row index. Can be either Daru::Index or Daru::MultiIndex.
|
39
48
|
attr_reader :index
|
49
|
+
# The total number of elements of the vector.
|
40
50
|
attr_reader :size
|
51
|
+
# The underlying dtype of the Vector. Can be either :array, :nmatrix or :gsl.
|
41
52
|
attr_reader :dtype
|
53
|
+
# If the dtype is :nmatrix, this attribute represents the data type of the
|
54
|
+
# underlying NMatrix object. See NMatrix docs for more details on NMatrix
|
55
|
+
# data types.
|
42
56
|
attr_reader :nm_dtype
|
43
|
-
|
57
|
+
# An Array or the positions in the vector that are being treated as 'missing'.
|
58
|
+
attr_reader :missing_positions
|
59
|
+
# Store a hash of labels for values. Supplementary only. Recommend using index
|
60
|
+
# for proper usage.
|
61
|
+
attr_accessor :labels
|
44
62
|
|
45
63
|
# Create a Vector object.
|
64
|
+
#
|
46
65
|
# == Arguments
|
47
66
|
#
|
48
|
-
# @param source[Array,Hash] - Supply elements in the form of an Array or a
|
49
|
-
#
|
50
|
-
#
|
51
|
-
#
|
52
|
-
#
|
53
|
-
#
|
67
|
+
# @param source[Array,Hash] - Supply elements in the form of an Array or a
|
68
|
+
# Hash. If Array, a numeric index will be created if not supplied in the
|
69
|
+
# options. Specifying more index elements than actual values in *source*
|
70
|
+
# will insert *nil* into the surplus index elements. When a Hash is specified,
|
71
|
+
# the keys of the Hash are taken as the index elements and the corresponding
|
72
|
+
# values as the values that populate the vector.
|
54
73
|
#
|
55
74
|
# == Options
|
56
75
|
#
|
@@ -58,10 +77,14 @@ module Daru
|
|
58
77
|
#
|
59
78
|
# * +:index+ - Index of the vector
|
60
79
|
#
|
61
|
-
# * +:dtype+ - The underlying data type. Can be :array
|
80
|
+
# * +:dtype+ - The underlying data type. Can be :array, :nmatrix or :gsl.
|
81
|
+
# Default :array.
|
62
82
|
#
|
63
83
|
# * +:nm_dtype+ - For NMatrix, the data type of the numbers. See the NMatrix docs for
|
64
|
-
#
|
84
|
+
# further information on supported data type.
|
85
|
+
#
|
86
|
+
# * +:missing_values+ - An Array of the values that are to be treated as 'missing'.
|
87
|
+
# nil is the default missing value.
|
65
88
|
#
|
66
89
|
# == Usage
|
67
90
|
#
|
@@ -79,7 +102,7 @@ module Daru
|
|
79
102
|
name = opts[:name]
|
80
103
|
set_name name
|
81
104
|
|
82
|
-
@data = cast_vector_to(opts[:dtype], source, opts[:nm_dtype])
|
105
|
+
@data = cast_vector_to(opts[:dtype] || :array, source, opts[:nm_dtype])
|
83
106
|
@index = create_index(index || @data.size)
|
84
107
|
|
85
108
|
if @index.size > @data.size
|
@@ -90,10 +113,81 @@ module Daru
|
|
90
113
|
end
|
91
114
|
|
92
115
|
@possibly_changed_type = true
|
93
|
-
|
116
|
+
set_missing_values opts[:missing_values]
|
117
|
+
set_missing_positions
|
94
118
|
set_size
|
95
119
|
end
|
96
120
|
|
121
|
+
# Create a new vector by specifying the size and an optional value
|
122
|
+
# and block to generate values.
|
123
|
+
#
|
124
|
+
# == Description
|
125
|
+
#
|
126
|
+
# The *new_with_size* class method lets you create a Daru::Vector
|
127
|
+
# by specifying the size as the argument. The optional block, if
|
128
|
+
# supplied, is run once for populating each element in the Vector.
|
129
|
+
#
|
130
|
+
# The result of each run of the block is the value that is ultimately
|
131
|
+
# assigned to that position in the Vector.
|
132
|
+
#
|
133
|
+
# == Options
|
134
|
+
# :value
|
135
|
+
# All the rest like .new
|
136
|
+
def self.new_with_size n, opts={}, &block
|
137
|
+
value = opts[:value]
|
138
|
+
opts.delete :value
|
139
|
+
if block
|
140
|
+
vector = Daru::Vector.new n.times.map { |i| block.call(i) }, opts
|
141
|
+
else
|
142
|
+
vector = Daru::Vector.new n.times.map { value }, opts
|
143
|
+
end
|
144
|
+
vector
|
145
|
+
end
|
146
|
+
|
147
|
+
# Create a vector using (almost) any object
|
148
|
+
# * Array: flattened
|
149
|
+
# * Range: transformed using to_a
|
150
|
+
# * Daru::Vector
|
151
|
+
# * Numeric and string values
|
152
|
+
#
|
153
|
+
# == Description
|
154
|
+
#
|
155
|
+
# The `Vector.[]` class method creates a vector from almost any
|
156
|
+
# object that has a `#to_a` method defined on it. It is similar
|
157
|
+
# to R's `c` method.
|
158
|
+
#
|
159
|
+
# == Usage
|
160
|
+
#
|
161
|
+
# a = Daru::Vector[1,2,3,4,6..10]
|
162
|
+
# #=>
|
163
|
+
# # <Daru::Vector:99448510 @name = nil @size = 9 >
|
164
|
+
# # nil
|
165
|
+
# # 0 1
|
166
|
+
# # 1 2
|
167
|
+
# # 2 3
|
168
|
+
# # 3 4
|
169
|
+
# # 4 6
|
170
|
+
# # 5 7
|
171
|
+
# # 6 8
|
172
|
+
# # 7 9
|
173
|
+
# # 8 10
|
174
|
+
def self.[](*args)
|
175
|
+
values = []
|
176
|
+
args.each do |a|
|
177
|
+
case a
|
178
|
+
when Array
|
179
|
+
values.concat a.flatten
|
180
|
+
when Daru::Vector
|
181
|
+
values.concat a.to_a
|
182
|
+
when Range
|
183
|
+
values.concat a.to_a
|
184
|
+
else
|
185
|
+
values << a
|
186
|
+
end
|
187
|
+
end
|
188
|
+
Daru::Vector.new(values)
|
189
|
+
end
|
190
|
+
|
97
191
|
# Get one or more elements with specified index or a range.
|
98
192
|
#
|
99
193
|
# == Usage
|
@@ -106,6 +200,7 @@ module Daru
|
|
106
200
|
# # For vectors employing hierarchial multi index
|
107
201
|
#
|
108
202
|
def [](*indexes)
|
203
|
+
indexes.map! { |e| e.respond_to?(:to_sym) ? e.to_sym : e }
|
109
204
|
location = indexes[0]
|
110
205
|
if @index.is_a?(MultiIndex)
|
111
206
|
result =
|
@@ -158,6 +253,19 @@ module Daru
|
|
158
253
|
end
|
159
254
|
end
|
160
255
|
|
256
|
+
# Just like in Hashes, you can specify the index label of the Daru::Vector
|
257
|
+
# and assign an element an that place in the Daru::Vector.
|
258
|
+
#
|
259
|
+
# == Usage
|
260
|
+
#
|
261
|
+
# v = Daru::Vector.new([1,2,3], index: [:a, :b, :c])
|
262
|
+
# v[:a] = 999
|
263
|
+
# #=>
|
264
|
+
# ##<Daru::Vector:90257920 @name = nil @size = 3 >
|
265
|
+
# # nil
|
266
|
+
# # a 999
|
267
|
+
# # b 2
|
268
|
+
# # c 3
|
161
269
|
def []=(*location, value)
|
162
270
|
cast(dtype: :array) if value.nil? and dtype != :array
|
163
271
|
|
@@ -182,7 +290,38 @@ module Daru
|
|
182
290
|
end
|
183
291
|
|
184
292
|
set_size
|
185
|
-
|
293
|
+
set_missing_positions unless Daru.lazy_update
|
294
|
+
end
|
295
|
+
|
296
|
+
# The values to be treated as 'missing'. *nil* is the default missing
|
297
|
+
# type. To set missing values see the missing_values= method.
|
298
|
+
def missing_values
|
299
|
+
@missing_values.keys
|
300
|
+
end
|
301
|
+
|
302
|
+
# Assign an Array to treat certain values as 'missing'.
|
303
|
+
#
|
304
|
+
# == Usage
|
305
|
+
#
|
306
|
+
# v = Daru::Vector.new [1,2,3,4,5]
|
307
|
+
# v.missing_values = [3]
|
308
|
+
# v.update
|
309
|
+
# v.missing_positions
|
310
|
+
# #=> [2]
|
311
|
+
def missing_values= values
|
312
|
+
set_missing_values values
|
313
|
+
set_missing_positions unless Daru.lazy_update
|
314
|
+
end
|
315
|
+
|
316
|
+
# Method for updating the metadata (i.e. missing value positions) of the
|
317
|
+
# after assingment/deletion etc. are complete. This is provided so that
|
318
|
+
# time is not wasted in creating the metadata for the vector each time
|
319
|
+
# assignment/deletion of elements is done. Updating data this way is called
|
320
|
+
# lazy loading. To set or unset lazy loading, see the .lazy_update= method.
|
321
|
+
def update
|
322
|
+
if Daru.lazy_update
|
323
|
+
set_missing_positions
|
324
|
+
end
|
186
325
|
end
|
187
326
|
|
188
327
|
# Two vectors are equal if the have the exact same index values corresponding
|
@@ -199,21 +338,20 @@ module Daru
|
|
199
338
|
end
|
200
339
|
end
|
201
340
|
|
202
|
-
def
|
203
|
-
|
341
|
+
def head q=10
|
342
|
+
self[0..(q-1)]
|
204
343
|
end
|
205
344
|
|
206
|
-
def
|
207
|
-
|
345
|
+
def tail q=10
|
346
|
+
self[(@size - q - 1)..(@size-1)]
|
208
347
|
end
|
209
348
|
|
210
|
-
|
211
|
-
|
349
|
+
# Reports whether missing data is present in the Vector.
|
350
|
+
def has_missing_data?
|
351
|
+
!missing_positions.empty?
|
212
352
|
end
|
353
|
+
alias :flawed? :has_missing_data?
|
213
354
|
|
214
|
-
def tail q=10
|
215
|
-
self[-q..-1]
|
216
|
-
end
|
217
355
|
|
218
356
|
# Append an element to the vector by specifying the element and index
|
219
357
|
def concat element, index=nil
|
@@ -231,8 +369,10 @@ module Daru
|
|
231
369
|
end
|
232
370
|
@data[@index[index]] = element
|
233
371
|
set_size
|
234
|
-
|
372
|
+
set_missing_positions unless Daru.lazy_update
|
235
373
|
end
|
374
|
+
alias :push :concat
|
375
|
+
alias :<< :concat
|
236
376
|
|
237
377
|
# Cast a vector to a new data type.
|
238
378
|
#
|
@@ -240,11 +380,11 @@ module Daru
|
|
240
380
|
#
|
241
381
|
# * +:dtype+ - :array for Ruby Array. :nmatrix for NMatrix.
|
242
382
|
def cast opts={}
|
243
|
-
|
383
|
+
dt = opts[:dtype]
|
244
384
|
raise ArgumentError, "Unsupported dtype #{opts[:dtype]}" unless
|
245
|
-
|
385
|
+
dt == :array or dt == :nmatrix or dt == :gsl
|
246
386
|
|
247
|
-
@data = cast_vector_to dtype
|
387
|
+
@data = cast_vector_to dt unless @dtype == dt
|
248
388
|
end
|
249
389
|
|
250
390
|
# Delete an element by value
|
@@ -264,7 +404,7 @@ module Daru
|
|
264
404
|
end
|
265
405
|
|
266
406
|
set_size
|
267
|
-
|
407
|
+
set_missing_positions unless Daru.lazy_update
|
268
408
|
end
|
269
409
|
|
270
410
|
# The type of data contained in the vector. Can be :object or :numeric. If
|
@@ -308,11 +448,19 @@ module Daru
|
|
308
448
|
Daru::Vector.new uniq_vector, name: @name, index: new_index, dtype: @dtype
|
309
449
|
end
|
310
450
|
|
451
|
+
def any? &block
|
452
|
+
@data.data.any?(&block)
|
453
|
+
end
|
454
|
+
|
455
|
+
def all? &block
|
456
|
+
@data.data.all?(&block)
|
457
|
+
end
|
458
|
+
|
311
459
|
# Sorts a vector according to its values. If a block is specified, the contents
|
312
|
-
#
|
313
|
-
#
|
314
|
-
#
|
315
|
-
#
|
460
|
+
# will be evaluated and data will be swapped whenever the block evaluates
|
461
|
+
# to *true*. Defaults to ascending order sorting. Any missing values will be
|
462
|
+
# put at the end of the vector. Preserves indexing. Default sort algorithm is
|
463
|
+
# quick sort.
|
316
464
|
#
|
317
465
|
# == Options
|
318
466
|
#
|
@@ -323,7 +471,7 @@ module Daru
|
|
323
471
|
#
|
324
472
|
# v = Daru::Vector.new ["My first guitar", "jazz", "guitar"]
|
325
473
|
# # Say you want to sort these strings by length.
|
326
|
-
# v.sort { |a,b| a.length <=> b.length }
|
474
|
+
# v.sort(ascending: false) { |a,b| a.length <=> b.length }
|
327
475
|
def sort opts={}, &block
|
328
476
|
opts = {
|
329
477
|
ascending: true,
|
@@ -339,18 +487,138 @@ module Daru
|
|
339
487
|
Daru::Vector.new(vector, index: create_index(index), name: @name, dtype: @dtype)
|
340
488
|
end
|
341
489
|
|
342
|
-
# Just sort the data and get an Array in return using Enumerable#sort.
|
490
|
+
# Just sort the data and get an Array in return using Enumerable#sort.
|
491
|
+
# Non-destructive.
|
343
492
|
def sorted_data &block
|
344
493
|
@data.to_a.sort(&block)
|
345
494
|
end
|
346
495
|
|
347
|
-
# Returns *true* if the value passed actually exists
|
496
|
+
# Returns *true* if the value passed is actually exists or is not marked as
|
497
|
+
# a *missing value*.
|
348
498
|
def exists? value
|
349
|
-
|
499
|
+
!@missing_values.has_key?(self[index_of(value)])
|
500
|
+
end
|
501
|
+
|
502
|
+
# Like map, but returns a Daru::Vector with the returned values.
|
503
|
+
def recode dt=nil, &block
|
504
|
+
return to_enum(:recode) unless block_given?
|
505
|
+
|
506
|
+
dup.recode! dt, &block
|
507
|
+
end
|
508
|
+
|
509
|
+
# Destructive version of recode!
|
510
|
+
def recode! dt=nil, &block
|
511
|
+
return to_enum(:recode!) unless block_given?
|
512
|
+
|
513
|
+
@data.map!(&block).data
|
514
|
+
@data = cast_vector_to(dt || @dtype)
|
515
|
+
self
|
516
|
+
end
|
517
|
+
|
518
|
+
def delete_if &block
|
519
|
+
return to_enum(:delete_if) unless block_given?
|
520
|
+
|
521
|
+
keep_e = []
|
522
|
+
keep_i = []
|
523
|
+
each_with_index do |n, i|
|
524
|
+
if yield(n)
|
525
|
+
keep_e << n
|
526
|
+
keep_i << i
|
527
|
+
end
|
528
|
+
end
|
529
|
+
|
530
|
+
@data = cast_vector_to @dtype, keep_e
|
531
|
+
@index = @index.is_a?(MultiIndex) ? MultiIndex.new(keep_i) : Index.new(keep_i)
|
532
|
+
set_missing_positions unless Daru.lazy_update
|
533
|
+
set_size
|
534
|
+
|
535
|
+
self
|
536
|
+
end
|
537
|
+
|
538
|
+
# Reports all values that doesn't comply with a condition.
|
539
|
+
# Returns a hash with the index of data and the invalid data.
|
540
|
+
def verify &block
|
541
|
+
h = {}
|
542
|
+
(0...size).each do |i|
|
543
|
+
if !(yield @data[i])
|
544
|
+
h[i] = @data[i]
|
545
|
+
end
|
546
|
+
end
|
547
|
+
|
548
|
+
h
|
549
|
+
end
|
550
|
+
|
551
|
+
# Return an Array with the data splitted by a separator.
|
552
|
+
# a=Daru::Vector.new(["a,b","c,d","a,b","d"])
|
553
|
+
# a.splitted
|
554
|
+
# =>
|
555
|
+
# [["a","b"],["c","d"],["a","b"],["d"]]
|
556
|
+
def splitted sep=","
|
557
|
+
@data.map do |s|
|
558
|
+
if s.nil?
|
559
|
+
nil
|
560
|
+
elsif s.respond_to? :split
|
561
|
+
s.split sep
|
562
|
+
else
|
563
|
+
[s]
|
564
|
+
end
|
565
|
+
end
|
566
|
+
end
|
567
|
+
|
568
|
+
# Returns a hash of Vectors, defined by the different values
|
569
|
+
# defined on the fields
|
570
|
+
# Example:
|
571
|
+
#
|
572
|
+
# a=Daru::Vector.new(["a,b","c,d","a,b"])
|
573
|
+
# a.split_by_separator
|
574
|
+
# => {"a"=>#<Daru::Vector:0x7f2dbcc09d88
|
575
|
+
# @data=[1, 0, 1]>,
|
576
|
+
# "b"=>#<Daru::Vector:0x7f2dbcc09c48
|
577
|
+
# @data=[1, 1, 0]>,
|
578
|
+
# "c"=>#<Daru::Vector:0x7f2dbcc09b08
|
579
|
+
# @data=[0, 1, 1]>}
|
580
|
+
#
|
581
|
+
def split_by_separator sep=","
|
582
|
+
split_data = splitted sep
|
583
|
+
factors = split_data.flatten.uniq.compact
|
584
|
+
|
585
|
+
out = factors.inject({}) do |h,x|
|
586
|
+
h[x] = []
|
587
|
+
h
|
588
|
+
end
|
589
|
+
|
590
|
+
split_data.each do |r|
|
591
|
+
if r.nil?
|
592
|
+
factors.each do |f|
|
593
|
+
out[f].push(nil)
|
594
|
+
end
|
595
|
+
else
|
596
|
+
factors.each do |f|
|
597
|
+
out[f].push(r.include?(f) ? 1:0)
|
598
|
+
end
|
599
|
+
end
|
600
|
+
end
|
601
|
+
|
602
|
+
out.inject({}) do |s,v|
|
603
|
+
s[v[0]] = Daru::Vector.new v[1]
|
604
|
+
s
|
605
|
+
end
|
606
|
+
end
|
607
|
+
|
608
|
+
def split_by_separator_freq(sep=",")
|
609
|
+
split_by_separator(sep).inject({}) do |a,v|
|
610
|
+
a[v[0]] = v[1].inject { |s,x| s+x.to_i }
|
611
|
+
a
|
612
|
+
end
|
613
|
+
end
|
614
|
+
|
615
|
+
def reset_index!
|
616
|
+
@index = Daru::Index.new(Array.new(size) { |i| i })
|
617
|
+
self
|
350
618
|
end
|
351
619
|
|
352
620
|
# Returns a vector which has *true* in the position where the element in self
|
353
|
-
#
|
621
|
+
# is nil, and false otherwise.
|
354
622
|
#
|
355
623
|
# == Usage
|
356
624
|
#
|
@@ -383,26 +651,34 @@ module Daru
|
|
383
651
|
end
|
384
652
|
|
385
653
|
# Replace all nils in the vector with the value passed as an argument. Destructive.
|
386
|
-
#
|
654
|
+
# See #replace_nils for non-destructive version
|
387
655
|
#
|
388
656
|
# == Arguments
|
389
657
|
#
|
390
658
|
# * +replacement+ - The value which should replace all nils
|
391
659
|
def replace_nils! replacement
|
392
|
-
|
660
|
+
missing_positions.each do |idx|
|
393
661
|
self[idx] = replacement
|
394
662
|
end
|
395
663
|
|
396
664
|
self
|
397
665
|
end
|
398
666
|
|
667
|
+
def detach_index
|
668
|
+
Daru::DataFrame.new({
|
669
|
+
index: @index.to_a.map(&:to_s),
|
670
|
+
vector: @data.to_a
|
671
|
+
})
|
672
|
+
end
|
673
|
+
|
399
674
|
# Non-destructive version of #replace_nils!
|
400
675
|
def replace_nils replacement
|
401
676
|
self.dup.replace_nils!(replacement)
|
402
677
|
end
|
403
678
|
|
679
|
+
# number of non-missing elements
|
404
680
|
def n_valid
|
405
|
-
@size
|
681
|
+
@size - missing_positions.size
|
406
682
|
end
|
407
683
|
|
408
684
|
# Returns *true* if an index exists
|
@@ -425,6 +701,20 @@ module Daru
|
|
425
701
|
end
|
426
702
|
end
|
427
703
|
|
704
|
+
# If dtype != gsl, will convert data to GSL::Vector with to_a. Otherwise returns
|
705
|
+
# the stored GSL::Vector object.
|
706
|
+
def to_gsl
|
707
|
+
if Daru.has_gsl?
|
708
|
+
if dtype == :gsl
|
709
|
+
return @data.data
|
710
|
+
else
|
711
|
+
GSL::Vector.alloc only_valid(:array).to_a
|
712
|
+
end
|
713
|
+
else
|
714
|
+
raise NoMethodError, "Install gsl-nmatrix for access to this functionality."
|
715
|
+
end
|
716
|
+
end
|
717
|
+
|
428
718
|
# Convert to hash. Hash keys are indexes and values are the correspoding elements
|
429
719
|
def to_hash
|
430
720
|
@index.inject({}) do |hsh, index|
|
@@ -446,12 +736,24 @@ module Daru
|
|
446
736
|
# Convert to html for iruby
|
447
737
|
def to_html threshold=30
|
448
738
|
name = @name || 'nil'
|
449
|
-
html =
|
739
|
+
html = "<table>" +
|
740
|
+
"<tr>" +
|
741
|
+
"<th colspan=\"2\">" +
|
742
|
+
"Daru::Vector:#{self.object_id} " + " size: #{size}" +
|
743
|
+
"</th>" +
|
744
|
+
"</tr>"
|
745
|
+
html += '<tr><th> </th><th>' + name.to_s + '</th></tr>'
|
450
746
|
@index.each_with_index do |index, num|
|
451
747
|
html += '<tr><td>' + index.to_s + '</td>' + '<td>' + self[index].to_s + '</td></tr>'
|
452
748
|
|
453
749
|
if num > threshold
|
454
750
|
html += '<tr><td>...</td><td>...</td></tr>'
|
751
|
+
|
752
|
+
last_index = @index.to_a.last
|
753
|
+
html += '<tr>' +
|
754
|
+
'<td>' + last_index.to_s + '</td>' +
|
755
|
+
'<td>' + self[last_index].to_s + '</td>' +
|
756
|
+
'</tr>'
|
455
757
|
break
|
456
758
|
end
|
457
759
|
end
|
@@ -464,11 +766,45 @@ module Daru
|
|
464
766
|
to_html
|
465
767
|
end
|
466
768
|
|
769
|
+
# Create a summary of the Vector using Report Builder.
|
770
|
+
def summary(method = :to_text)
|
771
|
+
ReportBuilder.new(no_title: true).add(self).send(method)
|
772
|
+
end
|
773
|
+
|
774
|
+
def report_building b
|
775
|
+
b.section(:name => name) do |s|
|
776
|
+
s.text "n :#{size}"
|
777
|
+
s.text "n valid:#{n_valid}"
|
778
|
+
if @type == :object
|
779
|
+
s.text "factors: #{factors.to_a.join(',')}"
|
780
|
+
s.text "mode: #{mode}"
|
781
|
+
|
782
|
+
s.table(:name => "Distribution") do |t|
|
783
|
+
frequencies.sort_by { |a| a.to_s }.each do |k,v|
|
784
|
+
key = @index.include?(k) ? @index[k] : k
|
785
|
+
t.row [key, v , ("%0.2f%%" % (v.quo(n_valid)*100))]
|
786
|
+
end
|
787
|
+
end
|
788
|
+
end
|
789
|
+
|
790
|
+
s.text "median: #{median.to_s}" if (@type==:numeric or @type==:numeric)
|
791
|
+
if @type==:numeric
|
792
|
+
s.text "mean: %0.4f" % mean
|
793
|
+
if sd
|
794
|
+
s.text "std.dev.: %0.4f" % sd
|
795
|
+
s.text "std.err.: %0.4f" % se
|
796
|
+
s.text "skew: %0.4f" % skew
|
797
|
+
s.text "kurtosis: %0.4f" % kurtosis
|
798
|
+
end
|
799
|
+
end
|
800
|
+
end
|
801
|
+
end
|
802
|
+
|
467
803
|
# Over rides original inspect for pretty printing in irb
|
468
804
|
def inspect spacing=20, threshold=15
|
469
805
|
longest = [@name.to_s.size,
|
470
|
-
@index.to_a.map(&:to_s).map(&:size).max,
|
471
|
-
@data .map(&:to_s).map(&:size).max,
|
806
|
+
(@index.to_a.map(&:to_s).map(&:size).max || 0),
|
807
|
+
(@data .map(&:to_s).map(&:size).max || 0),
|
472
808
|
'nil'.size].max
|
473
809
|
|
474
810
|
content = ""
|
@@ -503,6 +839,11 @@ module Daru
|
|
503
839
|
#
|
504
840
|
# @param new_name [Symbol] The new name.
|
505
841
|
def rename new_name
|
842
|
+
if new_name.is_a?(Numeric)
|
843
|
+
@name = new_name
|
844
|
+
return
|
845
|
+
end
|
846
|
+
|
506
847
|
@name = new_name.to_sym
|
507
848
|
end
|
508
849
|
|
@@ -511,12 +852,176 @@ module Daru
|
|
511
852
|
Daru::Vector.new @data.dup, name: @name, index: @index.dup
|
512
853
|
end
|
513
854
|
|
855
|
+
# == Bootstrap
|
856
|
+
# Generate +nr+ resamples (with replacement) of size +s+
|
857
|
+
# from vector, computing each estimate from +estimators+
|
858
|
+
# over each resample.
|
859
|
+
# +estimators+ could be
|
860
|
+
# a) Hash with variable names as keys and lambdas as values
|
861
|
+
# a.bootstrap(:log_s2=>lambda {|v| Math.log(v.variance)},1000)
|
862
|
+
# b) Array with names of method to bootstrap
|
863
|
+
# a.bootstrap([:mean, :sd],1000)
|
864
|
+
# c) A single method to bootstrap
|
865
|
+
# a.jacknife(:mean, 1000)
|
866
|
+
# If s is nil, is set to vector size by default.
|
867
|
+
#
|
868
|
+
# Returns a DataFrame where each vector is a vector
|
869
|
+
# of length +nr+ containing the computed resample estimates.
|
870
|
+
def bootstrap(estimators, nr, s=nil)
|
871
|
+
s ||= size
|
872
|
+
h_est, es, bss = prepare_bootstrap(estimators)
|
873
|
+
|
874
|
+
nr.times do |i|
|
875
|
+
bs = sample_with_replacement(s)
|
876
|
+
es.each do |estimator|
|
877
|
+
bss[estimator].push(h_est[estimator].call(bs))
|
878
|
+
end
|
879
|
+
end
|
880
|
+
|
881
|
+
es.each do |est|
|
882
|
+
bss[est] = Daru::Vector.new bss[est]
|
883
|
+
end
|
884
|
+
|
885
|
+
Daru::DataFrame.new bss
|
886
|
+
end
|
887
|
+
|
888
|
+
# == Jacknife
|
889
|
+
# Returns a dataset with jacknife delete-+k+ +estimators+
|
890
|
+
# +estimators+ could be:
|
891
|
+
# a) Hash with variable names as keys and lambdas as values
|
892
|
+
# a.jacknife(:log_s2=>lambda {|v| Math.log(v.variance)})
|
893
|
+
# b) Array with method names to jacknife
|
894
|
+
# a.jacknife([:mean, :sd])
|
895
|
+
# c) A single method to jacknife
|
896
|
+
# a.jacknife(:mean)
|
897
|
+
# +k+ represent the block size for block jacknife. By default
|
898
|
+
# is set to 1, for classic delete-one jacknife.
|
899
|
+
#
|
900
|
+
# Returns a dataset where each vector is an vector
|
901
|
+
# of length +cases+/+k+ containing the computed jacknife estimates.
|
902
|
+
#
|
903
|
+
# == Reference:
|
904
|
+
# * Sawyer, S. (2005). Resampling Data: Using a Statistical Jacknife.
|
905
|
+
def jackknife(estimators, k=1)
|
906
|
+
raise "n should be divisible by k:#{k}" unless size % k==0
|
907
|
+
|
908
|
+
nb = (size / k).to_i
|
909
|
+
h_est, es, ps = prepare_bootstrap(estimators)
|
910
|
+
|
911
|
+
est_n = es.inject({}) do |h,v|
|
912
|
+
h[v] = h_est[v].call(self)
|
913
|
+
h
|
914
|
+
end
|
915
|
+
|
916
|
+
nb.times do |i|
|
917
|
+
other = @data.dup
|
918
|
+
other.slice!(i*k, k)
|
919
|
+
other = Daru::Vector.new other
|
920
|
+
|
921
|
+
es.each do |estimator|
|
922
|
+
# Add pseudovalue
|
923
|
+
ps[estimator].push(
|
924
|
+
nb * est_n[estimator] - (nb-1) * h_est[estimator].call(other))
|
925
|
+
end
|
926
|
+
end
|
927
|
+
|
928
|
+
es.each do |est|
|
929
|
+
ps[est] = Daru::Vector.new ps[est]
|
930
|
+
end
|
931
|
+
Daru::DataFrame.new ps
|
932
|
+
end
|
933
|
+
|
934
|
+
# Creates a new vector consisting only of non-nil data
|
935
|
+
#
|
936
|
+
# == Arguments
|
937
|
+
#
|
938
|
+
# @as_a [Symbol] Passing :array will return only the elements
|
939
|
+
# as an Array. Otherwise will return a Daru::Vector.
|
940
|
+
#
|
941
|
+
# @duplicate [Symbol] In case no missing data is found in the
|
942
|
+
# vector, setting this to false will return the same vector.
|
943
|
+
# Otherwise, a duplicate will be returned irrespective of
|
944
|
+
# presence of missing data.
|
945
|
+
def only_valid as_a=:vector, duplicate=true
|
946
|
+
return self.dup if !has_missing_data? and as_a == :vector and duplicate
|
947
|
+
return self if !has_missing_data? and as_a == :vector and !duplicate
|
948
|
+
return self.to_a if !has_missing_data? and as_a != :vector
|
949
|
+
|
950
|
+
new_index = @index.to_a - missing_positions
|
951
|
+
new_vector = new_index.map do |idx|
|
952
|
+
self[idx]
|
953
|
+
end
|
954
|
+
|
955
|
+
return new_vector if as_a != :vector
|
956
|
+
|
957
|
+
Daru::Vector.new new_vector, index: new_index, name: @name, dtype: dtype
|
958
|
+
end
|
959
|
+
|
960
|
+
# Returns a Vector containing only missing data (preserves indexes).
|
961
|
+
def only_missing as_a=:vector
|
962
|
+
if as_a == :vector
|
963
|
+
self[*missing_positions]
|
964
|
+
elsif as_a == :array
|
965
|
+
self[*missing_positions].to_a
|
966
|
+
end
|
967
|
+
end
|
968
|
+
|
969
|
+
# Returns a Vector with only numerical data. Missing data is included
|
970
|
+
# but non-Numeric objects are excluded. Preserves index.
|
971
|
+
def only_numerics
|
972
|
+
numeric_indexes = []
|
973
|
+
|
974
|
+
each_with_index do |v, i|
|
975
|
+
numeric_indexes << i if(v.kind_of?(Numeric) or @missing_values.has_key?(v))
|
976
|
+
end
|
977
|
+
|
978
|
+
self[*numeric_indexes]
|
979
|
+
end
|
980
|
+
|
981
|
+
# Returns the database type for the vector, according to its content
|
982
|
+
def db_type(dbs=:mysql)
|
983
|
+
# first, detect any character not number
|
984
|
+
if @data.find {|v| v.to_s=~/\d{2,2}-\d{2,2}-\d{4,4}/} or @data.find {|v| v.to_s=~/\d{4,4}-\d{2,2}-\d{2,2}/}
|
985
|
+
return "DATE"
|
986
|
+
elsif @data.find {|v| v.to_s=~/[^0-9e.-]/ }
|
987
|
+
return "VARCHAR (255)"
|
988
|
+
elsif @data.find {|v| v.to_s=~/\./}
|
989
|
+
return "DOUBLE"
|
990
|
+
else
|
991
|
+
return "INTEGER"
|
992
|
+
end
|
993
|
+
end
|
994
|
+
|
514
995
|
# Copies the structure of the vector (i.e the index, size, etc.) and fills all
|
515
|
-
#
|
996
|
+
# all values with nils.
|
516
997
|
def clone_structure
|
517
998
|
Daru::Vector.new(([nil]*@size), name: @name, index: @index.dup)
|
518
999
|
end
|
519
1000
|
|
1001
|
+
# Save the vector to a file
|
1002
|
+
#
|
1003
|
+
# == Arguments
|
1004
|
+
#
|
1005
|
+
# * filename - Path of file where the vector is to be saved
|
1006
|
+
def save filename
|
1007
|
+
Daru::IO.save self, filename
|
1008
|
+
end
|
1009
|
+
|
1010
|
+
def _dump(depth) # :nodoc:
|
1011
|
+
Marshal.dump({
|
1012
|
+
data: @data.to_a,
|
1013
|
+
dtype: @dtype,
|
1014
|
+
name: @name,
|
1015
|
+
index: @index,
|
1016
|
+
missing_values: @missing_values})
|
1017
|
+
end
|
1018
|
+
|
1019
|
+
def self._load(data) # :nodoc:
|
1020
|
+
h = Marshal.load(data)
|
1021
|
+
Daru::Vector.new(h[:data], index: h[:index],
|
1022
|
+
name: h[:name], dtype: h[:dtype], missing_values: h[:missing_values])
|
1023
|
+
end
|
1024
|
+
|
520
1025
|
def daru_vector *name
|
521
1026
|
self
|
522
1027
|
end
|
@@ -535,6 +1040,26 @@ module Daru
|
|
535
1040
|
|
536
1041
|
private
|
537
1042
|
|
1043
|
+
# For an array or hash of estimators methods, returns
|
1044
|
+
# an array with three elements
|
1045
|
+
# 1.- A hash with estimators names as keys and lambdas as values
|
1046
|
+
# 2.- An array with estimators names
|
1047
|
+
# 3.- A Hash with estimators names as keys and empty arrays as values
|
1048
|
+
def prepare_bootstrap(estimators)
|
1049
|
+
h_est = estimators
|
1050
|
+
h_est = [h_est] unless h_est.is_a?(Array) or h_est.is_a?(Hash)
|
1051
|
+
|
1052
|
+
if h_est.is_a? Array
|
1053
|
+
h_est = h_est.inject({}) do |h, est|
|
1054
|
+
h[est] = lambda { |v| Daru::Vector.new(v).send(est) }
|
1055
|
+
h
|
1056
|
+
end
|
1057
|
+
end
|
1058
|
+
bss = h_est.keys.inject({}) { |h,v| h[v] = []; h }
|
1059
|
+
|
1060
|
+
[h_est, h_est.keys, bss]
|
1061
|
+
end
|
1062
|
+
|
538
1063
|
def quick_sort vector, index, order, &block
|
539
1064
|
recursive_quick_sort vector, index, order, 0, @size-1, &block
|
540
1065
|
[vector, index]
|
@@ -599,18 +1124,17 @@ module Daru
|
|
599
1124
|
end
|
600
1125
|
|
601
1126
|
# Note: To maintain sanity, this _MUST_ be the _ONLY_ place in daru where the
|
602
|
-
#
|
1127
|
+
# @dtype variable is set and the underlying data type of vector changed.
|
603
1128
|
def cast_vector_to dtype, source=nil, nm_dtype=nil
|
604
|
-
source = @data if source.nil?
|
605
|
-
return @data if @dtype and @dtype == dtype
|
1129
|
+
source = @data.to_a if source.nil?
|
606
1130
|
|
607
1131
|
new_vector =
|
608
1132
|
case dtype
|
609
|
-
when :array then Daru::Accessors::ArrayWrapper.new(source
|
610
|
-
when :nmatrix then Daru::Accessors::NMatrixWrapper.new(source
|
611
|
-
|
1133
|
+
when :array then Daru::Accessors::ArrayWrapper.new(source, self)
|
1134
|
+
when :nmatrix then Daru::Accessors::NMatrixWrapper.new(source, self, nm_dtype)
|
1135
|
+
when :gsl then Daru::Accessors::GSLWrapper.new(source, self)
|
612
1136
|
when :mdarray then raise NotImplementedError, "MDArray not yet supported."
|
613
|
-
else
|
1137
|
+
else raise "Unknown dtype #{dtype}"
|
614
1138
|
end
|
615
1139
|
|
616
1140
|
@dtype = dtype || :array
|
@@ -649,12 +1173,11 @@ module Daru
|
|
649
1173
|
end
|
650
1174
|
end
|
651
1175
|
|
652
|
-
def
|
653
|
-
@
|
1176
|
+
def set_missing_positions
|
1177
|
+
@missing_positions = []
|
654
1178
|
@index.each do |e|
|
655
|
-
@
|
1179
|
+
@missing_positions << e if (@missing_values.has_key?(self[e]))
|
656
1180
|
end
|
657
|
-
@nil_positions.uniq!
|
658
1181
|
end
|
659
1182
|
|
660
1183
|
def create_index potential_index
|
@@ -669,5 +1192,17 @@ module Daru
|
|
669
1192
|
pos = index_for location
|
670
1193
|
pos ? @data[pos] : nil
|
671
1194
|
end
|
1195
|
+
|
1196
|
+
# Setup missing_values. The missing_values instance variable is set
|
1197
|
+
# as a Hash for faster lookup times.
|
1198
|
+
def set_missing_values values_arry
|
1199
|
+
@missing_values = {}
|
1200
|
+
@missing_values[nil] = 0
|
1201
|
+
if values_arry
|
1202
|
+
values_arry.each do |e|
|
1203
|
+
@missing_values[e] = 0
|
1204
|
+
end
|
1205
|
+
end
|
1206
|
+
end
|
672
1207
|
end
|
673
1208
|
end
|