daru 0.0.5 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.build.sh +14 -0
- data/.travis.yml +26 -4
- data/CONTRIBUTING.md +31 -0
- data/Gemfile +1 -2
- data/{History.txt → History.md} +110 -44
- data/README.md +21 -288
- data/Rakefile +1 -0
- data/daru.gemspec +12 -8
- data/lib/daru.rb +36 -1
- data/lib/daru/accessors/array_wrapper.rb +8 -3
- data/lib/daru/accessors/gsl_wrapper.rb +113 -0
- data/lib/daru/accessors/nmatrix_wrapper.rb +6 -17
- data/lib/daru/core/group_by.rb +0 -1
- data/lib/daru/dataframe.rb +1192 -83
- data/lib/daru/extensions/rserve.rb +21 -0
- data/lib/daru/index.rb +14 -0
- data/lib/daru/io/io.rb +170 -8
- data/lib/daru/maths/arithmetic/dataframe.rb +4 -3
- data/lib/daru/maths/arithmetic/vector.rb +4 -4
- data/lib/daru/maths/statistics/dataframe.rb +48 -27
- data/lib/daru/maths/statistics/vector.rb +215 -33
- data/lib/daru/monkeys.rb +53 -7
- data/lib/daru/multi_index.rb +21 -4
- data/lib/daru/plotting/dataframe.rb +83 -25
- data/lib/daru/plotting/vector.rb +9 -10
- data/lib/daru/vector.rb +596 -61
- data/lib/daru/version.rb +3 -0
- data/spec/accessors/wrappers_spec.rb +51 -0
- data/spec/core/group_by_spec.rb +0 -2
- data/spec/daru_spec.rb +58 -0
- data/spec/dataframe_spec.rb +768 -73
- data/spec/extensions/rserve_spec.rb +52 -0
- data/spec/fixtures/bank2.dat +200 -0
- data/spec/fixtures/repeated_fields.csv +7 -0
- data/spec/fixtures/scientific_notation.csv +4 -0
- data/spec/fixtures/test_xls.xls +0 -0
- data/spec/io/io_spec.rb +161 -24
- data/spec/math/arithmetic/dataframe_spec.rb +26 -7
- data/spec/math/arithmetic/vector_spec.rb +8 -0
- data/spec/math/statistics/dataframe_spec.rb +16 -1
- data/spec/math/statistics/vector_spec.rb +215 -47
- data/spec/spec_helper.rb +21 -2
- data/spec/vector_spec.rb +368 -12
- metadata +99 -16
- data/lib/version.rb +0 -3
- data/notebooks/grouping_splitting_pivots.ipynb +0 -529
- data/notebooks/intro_with_music_data_.ipynb +0 -303
data/lib/daru/monkeys.rb
CHANGED
@@ -1,5 +1,41 @@
|
|
1
1
|
class Array
|
2
|
-
|
2
|
+
# Recode repeated values on an array, adding the number of repetition
|
3
|
+
# at the end
|
4
|
+
# Example:
|
5
|
+
# a=%w{a b c c d d d e}
|
6
|
+
# a.recode_repeated
|
7
|
+
# => ["a","b","c_1","c_2","d_1","d_2","d_3","e"]
|
8
|
+
def recode_repeated
|
9
|
+
if size != uniq.size
|
10
|
+
# Find repeated
|
11
|
+
repeated = inject({}) do |acc, v|
|
12
|
+
if acc[v].nil?
|
13
|
+
acc[v] = 1
|
14
|
+
else
|
15
|
+
acc[v] += 1
|
16
|
+
end
|
17
|
+
acc
|
18
|
+
end.select { |_k, v| v > 1 }.keys
|
19
|
+
|
20
|
+
ns = repeated.inject({}) do |acc, v|
|
21
|
+
acc[v] = 0
|
22
|
+
acc
|
23
|
+
end
|
24
|
+
|
25
|
+
collect do |f|
|
26
|
+
if repeated.include? f
|
27
|
+
ns[f] += 1
|
28
|
+
sprintf('%s_%d', f, ns[f])
|
29
|
+
else
|
30
|
+
f
|
31
|
+
end
|
32
|
+
end
|
33
|
+
else
|
34
|
+
self
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def daru_vector name=nil, index=nil, dtype=:array
|
3
39
|
Daru::Vector.new self, name: name, index: index, dtype: dtype
|
4
40
|
end
|
5
41
|
|
@@ -11,8 +47,8 @@ class Array
|
|
11
47
|
end
|
12
48
|
|
13
49
|
class Range
|
14
|
-
def daru_vector name=nil, index=nil, dtype
|
15
|
-
Daru::Vector.new self, name: name, index: index, dtype:
|
50
|
+
def daru_vector name=nil, index=nil, dtype=:array
|
51
|
+
Daru::Vector.new self, name: name, index: index, dtype: dtype
|
16
52
|
end
|
17
53
|
|
18
54
|
alias_method :dv, :daru_vector
|
@@ -23,8 +59,8 @@ class Range
|
|
23
59
|
end
|
24
60
|
|
25
61
|
class Hash
|
26
|
-
def daru_vector index=nil, dtype
|
27
|
-
Daru::Vector.new self.values[0], name: self.keys[0], index: index, dtype:
|
62
|
+
def daru_vector index=nil, dtype=:array
|
63
|
+
Daru::Vector.new self.values[0], name: self.keys[0], index: index, dtype: dtype
|
28
64
|
end
|
29
65
|
|
30
66
|
alias_method :dv, :daru_vector
|
@@ -32,7 +68,7 @@ end
|
|
32
68
|
|
33
69
|
class NMatrix
|
34
70
|
def daru_vector name=nil, index=nil, dtype=NMatrix
|
35
|
-
Daru::Vector.new self, name: name, index: index, dtype:
|
71
|
+
Daru::Vector.new self, name: name, index: index, dtype: :nmatrix
|
36
72
|
end
|
37
73
|
|
38
74
|
alias_method :dv, :daru_vector
|
@@ -40,7 +76,7 @@ end
|
|
40
76
|
|
41
77
|
class MDArray
|
42
78
|
def daru_vector name=nil, index=nil, dtype=MDArray
|
43
|
-
Daru::Vector.new self, name: name, index: index, dtype:
|
79
|
+
Daru::Vector.new self, name: name, index: index, dtype: :mdarray
|
44
80
|
end
|
45
81
|
|
46
82
|
alias_method :dv, :daru_vector
|
@@ -58,4 +94,14 @@ class Matrix
|
|
58
94
|
e / other.to_a.flatten[index]
|
59
95
|
end
|
60
96
|
end
|
97
|
+
end
|
98
|
+
|
99
|
+
class String
|
100
|
+
def is_number?
|
101
|
+
if self =~ /^-?\d+[,.]?\d*(e-?\d+)?$/
|
102
|
+
true
|
103
|
+
else
|
104
|
+
false
|
105
|
+
end
|
106
|
+
end
|
61
107
|
end
|
data/lib/daru/multi_index.rb
CHANGED
@@ -17,12 +17,12 @@ module Daru
|
|
17
17
|
attr_reader :values
|
18
18
|
|
19
19
|
# Initialize a MultiIndex by passing a tuple of indexes. The order assigned
|
20
|
-
#
|
21
|
-
#
|
20
|
+
# to the multi index corresponds to the position of the tuple in the array
|
21
|
+
# of tuples.
|
22
22
|
#
|
23
23
|
# Although you can create your own hierarchially indexed Vectors and DataFrames,
|
24
|
-
#
|
25
|
-
#
|
24
|
+
# this class currently contains minimal error checking and is mainly used
|
25
|
+
# internally for summarizing, splitting and grouping of data.
|
26
26
|
#
|
27
27
|
# == Arguments
|
28
28
|
#
|
@@ -68,6 +68,19 @@ module Daru
|
|
68
68
|
end
|
69
69
|
end
|
70
70
|
|
71
|
+
def + other
|
72
|
+
other.flatten!
|
73
|
+
tuples = to_a
|
74
|
+
raise ArgumentError, "Incomplete tuple #{other}" unless
|
75
|
+
tuples.all? { |t| t.size == other.size }
|
76
|
+
|
77
|
+
Daru::MultiIndex.new(tuples << (other))
|
78
|
+
end
|
79
|
+
|
80
|
+
def empty?
|
81
|
+
@relation_hash.empty?
|
82
|
+
end
|
83
|
+
|
71
84
|
# Compare two MultiIndex objects for equality based on the contents of their
|
72
85
|
# relation hashes. Does not take object_id into account.
|
73
86
|
def == other
|
@@ -108,6 +121,10 @@ module Daru
|
|
108
121
|
tuple.empty? ? nil : tuple
|
109
122
|
end
|
110
123
|
|
124
|
+
def size
|
125
|
+
to_a.size
|
126
|
+
end
|
127
|
+
|
111
128
|
private
|
112
129
|
|
113
130
|
# Deep compare two hashes
|
@@ -1,9 +1,3 @@
|
|
1
|
-
begin
|
2
|
-
require 'nyaplot'
|
3
|
-
rescue LoadError => e
|
4
|
-
puts "#{e}"
|
5
|
-
end
|
6
|
-
|
7
1
|
module Daru
|
8
2
|
module Plotting
|
9
3
|
module DataFrame
|
@@ -12,36 +6,100 @@ module Daru
|
|
12
6
|
# to the block, if it is specified. See the nyaplot docs for info on how to
|
13
7
|
# further use these objects.
|
14
8
|
#
|
9
|
+
# Detailed instructions on use of the plotting API can be found in the
|
10
|
+
# notebooks whose links you can find in the README.
|
11
|
+
#
|
15
12
|
# == Options
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
# will keep vector :a as the X axis and :b as the Y axis. Passing [:a]
|
21
|
-
# keep :a as the X axis and plot the frequency with which :a appears
|
22
|
-
# on the Y axis.
|
23
|
-
# +:frame+ - Pass this as *true* to disable plotting the graph directly
|
24
|
-
# and instead manually create Nyaplot::Frame object inside the block using
|
25
|
-
# the Nyaplot::Plot object for plotting one or many graphs in a frame.
|
13
|
+
#
|
14
|
+
# * +:type+ - Type of plot. Can be :scatter, :bar, :histogram, :line or :box.
|
15
|
+
# * +:x+ - Vector to be used for X co-ordinates.
|
16
|
+
# * +:y+ - Vector to be used for Y co-ordinates.
|
26
17
|
#
|
27
18
|
# == Usage
|
28
|
-
#
|
29
|
-
# df.
|
19
|
+
# # Simple bar chart
|
20
|
+
# df = Daru::DataFrame.new({a:['A', 'B', 'C', 'D', 'E'], b:[10,20,30,40,50]})
|
21
|
+
# df.plot type: :bar, x: :a, y: :b
|
30
22
|
def plot opts={}
|
31
23
|
options = {
|
32
|
-
type: :scatter
|
33
|
-
frame: false,
|
34
|
-
legends: []
|
24
|
+
type: :scatter
|
35
25
|
}.merge(opts)
|
36
26
|
|
37
27
|
plot = Nyaplot::Plot.new
|
38
|
-
|
39
|
-
|
28
|
+
types = extract_option :type, options
|
29
|
+
|
30
|
+
diagram =
|
31
|
+
case
|
32
|
+
when !([:scatter, :bar, :line, :histogram] & types).empty?
|
33
|
+
if single_diagram? options
|
34
|
+
add_single_diagram plot, options
|
35
|
+
else
|
36
|
+
add_multiple_diagrams plot, options
|
37
|
+
end
|
38
|
+
when types.include?(:box)
|
39
|
+
numeric = self.only_numerics(clone: false).dup_only_valid
|
40
|
+
|
41
|
+
plot.add_with_df(
|
42
|
+
numeric.to_nyaplotdf,
|
43
|
+
:box, *numeric.vectors.to_a)
|
44
|
+
end
|
40
45
|
|
41
46
|
yield(plot, diagram) if block_given?
|
42
47
|
|
43
|
-
plot.show
|
48
|
+
plot.show
|
44
49
|
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def single_diagram? options
|
54
|
+
options[:x] and options[:x].is_a?(Symbol)
|
55
|
+
end
|
56
|
+
|
57
|
+
def add_single_diagram plot, options
|
58
|
+
args = [
|
59
|
+
self.to_nyaplotdf,
|
60
|
+
options[:type],
|
61
|
+
options[:x]
|
62
|
+
]
|
63
|
+
|
64
|
+
args << options[:y] if(options[:y])
|
65
|
+
|
66
|
+
plot.add_with_df(*args)
|
67
|
+
end
|
68
|
+
|
69
|
+
def add_multiple_diagrams plot, options
|
70
|
+
types = extract_option :type, options
|
71
|
+
x_vecs = extract_option :x, options
|
72
|
+
y_vecs = extract_option :y, options
|
73
|
+
|
74
|
+
diagrams = []
|
75
|
+
nyaplot_df = self.to_nyaplotdf
|
76
|
+
total = x_vecs.size
|
77
|
+
types = types.size < total ? types*total : types
|
78
|
+
|
79
|
+
|
80
|
+
(0...total).each do |i|
|
81
|
+
diagrams << plot.add_with_df(
|
82
|
+
nyaplot_df,
|
83
|
+
types[i],
|
84
|
+
x_vecs[i],
|
85
|
+
y_vecs[i]
|
86
|
+
)
|
87
|
+
end
|
88
|
+
|
89
|
+
diagrams
|
90
|
+
end
|
91
|
+
|
92
|
+
def extract_option opt, options
|
93
|
+
if options[opt]
|
94
|
+
o = options[opt]
|
95
|
+
o.is_a?(Array) ? o : [o]
|
96
|
+
else
|
97
|
+
arr = options.keys
|
98
|
+
arr.keep_if { |a| a =~ Regexp.new("\\A#{opt.to_s}") }.sort
|
99
|
+
arr.map { |a| options[a] }
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
45
103
|
end
|
46
104
|
end
|
47
|
-
end
|
105
|
+
end if Daru.has_nyaplot?
|
data/lib/daru/plotting/vector.rb
CHANGED
@@ -1,9 +1,3 @@
|
|
1
|
-
begin
|
2
|
-
require 'nyaplot'
|
3
|
-
rescue LoadError => e
|
4
|
-
puts "#{e}"
|
5
|
-
end
|
6
|
-
|
7
1
|
module Daru
|
8
2
|
module Plotting
|
9
3
|
module Vector
|
@@ -27,9 +21,14 @@ module Daru
|
|
27
21
|
type: :scatter
|
28
22
|
}.merge(opts)
|
29
23
|
|
30
|
-
x_axis
|
31
|
-
plot
|
32
|
-
diagram =
|
24
|
+
x_axis = options[:type] == :scatter ? Array.new(@size) { |i| i } : @index.to_a
|
25
|
+
plot = Nyaplot::Plot.new
|
26
|
+
diagram =
|
27
|
+
if [:box, :histogram].include? options[:type]
|
28
|
+
plot.add(options[:type], @data.to_a)
|
29
|
+
else
|
30
|
+
plot.add(options[:type], x_axis, @data.to_a)
|
31
|
+
end
|
33
32
|
|
34
33
|
yield plot, diagram if block_given?
|
35
34
|
|
@@ -37,4 +36,4 @@ module Daru
|
|
37
36
|
end
|
38
37
|
end
|
39
38
|
end
|
40
|
-
end
|
39
|
+
end if Daru.has_nyaplot?
|
data/lib/daru/vector.rb
CHANGED
@@ -5,13 +5,14 @@ require 'maths/statistics/vector.rb'
|
|
5
5
|
require 'plotting/vector.rb'
|
6
6
|
require 'accessors/array_wrapper.rb'
|
7
7
|
require 'accessors/nmatrix_wrapper.rb'
|
8
|
+
require 'accessors/gsl_wrapper.rb'
|
8
9
|
|
9
10
|
module Daru
|
10
11
|
class Vector
|
11
12
|
include Enumerable
|
12
13
|
include Daru::Maths::Arithmetic::Vector
|
13
14
|
include Daru::Maths::Statistics::Vector
|
14
|
-
include Daru::Plotting::Vector
|
15
|
+
include Daru::Plotting::Vector if Daru.has_nyaplot?
|
15
16
|
|
16
17
|
def each(&block)
|
17
18
|
return to_enum(:each) unless block_given?
|
@@ -20,37 +21,55 @@ module Daru
|
|
20
21
|
self
|
21
22
|
end
|
22
23
|
|
23
|
-
def
|
24
|
-
return to_enum(:
|
24
|
+
def each_index(&block)
|
25
|
+
return to_enum(:each_index) unless block_given?
|
25
26
|
|
26
|
-
@
|
27
|
+
@index.each(&block)
|
27
28
|
self
|
28
29
|
end
|
29
30
|
|
30
|
-
def
|
31
|
-
return to_enum(:
|
31
|
+
def each_with_index(&block)
|
32
|
+
return to_enum(:each_with_index) unless block_given?
|
32
33
|
|
33
|
-
|
34
|
+
@index.each { |i| yield(self[i], i) }
|
35
|
+
self
|
34
36
|
end
|
35
37
|
|
36
|
-
|
38
|
+
def map!(&block)
|
39
|
+
return to_enum(:map!) unless block_given?
|
40
|
+
@data.map!(&block)
|
41
|
+
update
|
42
|
+
self
|
43
|
+
end
|
37
44
|
|
45
|
+
# The name of the Daru::Vector. String.
|
38
46
|
attr_reader :name
|
47
|
+
# The row index. Can be either Daru::Index or Daru::MultiIndex.
|
39
48
|
attr_reader :index
|
49
|
+
# The total number of elements of the vector.
|
40
50
|
attr_reader :size
|
51
|
+
# The underlying dtype of the Vector. Can be either :array, :nmatrix or :gsl.
|
41
52
|
attr_reader :dtype
|
53
|
+
# If the dtype is :nmatrix, this attribute represents the data type of the
|
54
|
+
# underlying NMatrix object. See NMatrix docs for more details on NMatrix
|
55
|
+
# data types.
|
42
56
|
attr_reader :nm_dtype
|
43
|
-
|
57
|
+
# An Array or the positions in the vector that are being treated as 'missing'.
|
58
|
+
attr_reader :missing_positions
|
59
|
+
# Store a hash of labels for values. Supplementary only. Recommend using index
|
60
|
+
# for proper usage.
|
61
|
+
attr_accessor :labels
|
44
62
|
|
45
63
|
# Create a Vector object.
|
64
|
+
#
|
46
65
|
# == Arguments
|
47
66
|
#
|
48
|
-
# @param source[Array,Hash] - Supply elements in the form of an Array or a
|
49
|
-
#
|
50
|
-
#
|
51
|
-
#
|
52
|
-
#
|
53
|
-
#
|
67
|
+
# @param source[Array,Hash] - Supply elements in the form of an Array or a
|
68
|
+
# Hash. If Array, a numeric index will be created if not supplied in the
|
69
|
+
# options. Specifying more index elements than actual values in *source*
|
70
|
+
# will insert *nil* into the surplus index elements. When a Hash is specified,
|
71
|
+
# the keys of the Hash are taken as the index elements and the corresponding
|
72
|
+
# values as the values that populate the vector.
|
54
73
|
#
|
55
74
|
# == Options
|
56
75
|
#
|
@@ -58,10 +77,14 @@ module Daru
|
|
58
77
|
#
|
59
78
|
# * +:index+ - Index of the vector
|
60
79
|
#
|
61
|
-
# * +:dtype+ - The underlying data type. Can be :array
|
80
|
+
# * +:dtype+ - The underlying data type. Can be :array, :nmatrix or :gsl.
|
81
|
+
# Default :array.
|
62
82
|
#
|
63
83
|
# * +:nm_dtype+ - For NMatrix, the data type of the numbers. See the NMatrix docs for
|
64
|
-
#
|
84
|
+
# further information on supported data type.
|
85
|
+
#
|
86
|
+
# * +:missing_values+ - An Array of the values that are to be treated as 'missing'.
|
87
|
+
# nil is the default missing value.
|
65
88
|
#
|
66
89
|
# == Usage
|
67
90
|
#
|
@@ -79,7 +102,7 @@ module Daru
|
|
79
102
|
name = opts[:name]
|
80
103
|
set_name name
|
81
104
|
|
82
|
-
@data = cast_vector_to(opts[:dtype], source, opts[:nm_dtype])
|
105
|
+
@data = cast_vector_to(opts[:dtype] || :array, source, opts[:nm_dtype])
|
83
106
|
@index = create_index(index || @data.size)
|
84
107
|
|
85
108
|
if @index.size > @data.size
|
@@ -90,10 +113,81 @@ module Daru
|
|
90
113
|
end
|
91
114
|
|
92
115
|
@possibly_changed_type = true
|
93
|
-
|
116
|
+
set_missing_values opts[:missing_values]
|
117
|
+
set_missing_positions
|
94
118
|
set_size
|
95
119
|
end
|
96
120
|
|
121
|
+
# Create a new vector by specifying the size and an optional value
|
122
|
+
# and block to generate values.
|
123
|
+
#
|
124
|
+
# == Description
|
125
|
+
#
|
126
|
+
# The *new_with_size* class method lets you create a Daru::Vector
|
127
|
+
# by specifying the size as the argument. The optional block, if
|
128
|
+
# supplied, is run once for populating each element in the Vector.
|
129
|
+
#
|
130
|
+
# The result of each run of the block is the value that is ultimately
|
131
|
+
# assigned to that position in the Vector.
|
132
|
+
#
|
133
|
+
# == Options
|
134
|
+
# :value
|
135
|
+
# All the rest like .new
|
136
|
+
def self.new_with_size n, opts={}, &block
|
137
|
+
value = opts[:value]
|
138
|
+
opts.delete :value
|
139
|
+
if block
|
140
|
+
vector = Daru::Vector.new n.times.map { |i| block.call(i) }, opts
|
141
|
+
else
|
142
|
+
vector = Daru::Vector.new n.times.map { value }, opts
|
143
|
+
end
|
144
|
+
vector
|
145
|
+
end
|
146
|
+
|
147
|
+
# Create a vector using (almost) any object
|
148
|
+
# * Array: flattened
|
149
|
+
# * Range: transformed using to_a
|
150
|
+
# * Daru::Vector
|
151
|
+
# * Numeric and string values
|
152
|
+
#
|
153
|
+
# == Description
|
154
|
+
#
|
155
|
+
# The `Vector.[]` class method creates a vector from almost any
|
156
|
+
# object that has a `#to_a` method defined on it. It is similar
|
157
|
+
# to R's `c` method.
|
158
|
+
#
|
159
|
+
# == Usage
|
160
|
+
#
|
161
|
+
# a = Daru::Vector[1,2,3,4,6..10]
|
162
|
+
# #=>
|
163
|
+
# # <Daru::Vector:99448510 @name = nil @size = 9 >
|
164
|
+
# # nil
|
165
|
+
# # 0 1
|
166
|
+
# # 1 2
|
167
|
+
# # 2 3
|
168
|
+
# # 3 4
|
169
|
+
# # 4 6
|
170
|
+
# # 5 7
|
171
|
+
# # 6 8
|
172
|
+
# # 7 9
|
173
|
+
# # 8 10
|
174
|
+
def self.[](*args)
|
175
|
+
values = []
|
176
|
+
args.each do |a|
|
177
|
+
case a
|
178
|
+
when Array
|
179
|
+
values.concat a.flatten
|
180
|
+
when Daru::Vector
|
181
|
+
values.concat a.to_a
|
182
|
+
when Range
|
183
|
+
values.concat a.to_a
|
184
|
+
else
|
185
|
+
values << a
|
186
|
+
end
|
187
|
+
end
|
188
|
+
Daru::Vector.new(values)
|
189
|
+
end
|
190
|
+
|
97
191
|
# Get one or more elements with specified index or a range.
|
98
192
|
#
|
99
193
|
# == Usage
|
@@ -106,6 +200,7 @@ module Daru
|
|
106
200
|
# # For vectors employing hierarchial multi index
|
107
201
|
#
|
108
202
|
def [](*indexes)
|
203
|
+
indexes.map! { |e| e.respond_to?(:to_sym) ? e.to_sym : e }
|
109
204
|
location = indexes[0]
|
110
205
|
if @index.is_a?(MultiIndex)
|
111
206
|
result =
|
@@ -158,6 +253,19 @@ module Daru
|
|
158
253
|
end
|
159
254
|
end
|
160
255
|
|
256
|
+
# Just like in Hashes, you can specify the index label of the Daru::Vector
|
257
|
+
# and assign an element an that place in the Daru::Vector.
|
258
|
+
#
|
259
|
+
# == Usage
|
260
|
+
#
|
261
|
+
# v = Daru::Vector.new([1,2,3], index: [:a, :b, :c])
|
262
|
+
# v[:a] = 999
|
263
|
+
# #=>
|
264
|
+
# ##<Daru::Vector:90257920 @name = nil @size = 3 >
|
265
|
+
# # nil
|
266
|
+
# # a 999
|
267
|
+
# # b 2
|
268
|
+
# # c 3
|
161
269
|
def []=(*location, value)
|
162
270
|
cast(dtype: :array) if value.nil? and dtype != :array
|
163
271
|
|
@@ -182,7 +290,38 @@ module Daru
|
|
182
290
|
end
|
183
291
|
|
184
292
|
set_size
|
185
|
-
|
293
|
+
set_missing_positions unless Daru.lazy_update
|
294
|
+
end
|
295
|
+
|
296
|
+
# The values to be treated as 'missing'. *nil* is the default missing
|
297
|
+
# type. To set missing values see the missing_values= method.
|
298
|
+
def missing_values
|
299
|
+
@missing_values.keys
|
300
|
+
end
|
301
|
+
|
302
|
+
# Assign an Array to treat certain values as 'missing'.
|
303
|
+
#
|
304
|
+
# == Usage
|
305
|
+
#
|
306
|
+
# v = Daru::Vector.new [1,2,3,4,5]
|
307
|
+
# v.missing_values = [3]
|
308
|
+
# v.update
|
309
|
+
# v.missing_positions
|
310
|
+
# #=> [2]
|
311
|
+
def missing_values= values
|
312
|
+
set_missing_values values
|
313
|
+
set_missing_positions unless Daru.lazy_update
|
314
|
+
end
|
315
|
+
|
316
|
+
# Method for updating the metadata (i.e. missing value positions) of the
|
317
|
+
# after assingment/deletion etc. are complete. This is provided so that
|
318
|
+
# time is not wasted in creating the metadata for the vector each time
|
319
|
+
# assignment/deletion of elements is done. Updating data this way is called
|
320
|
+
# lazy loading. To set or unset lazy loading, see the .lazy_update= method.
|
321
|
+
def update
|
322
|
+
if Daru.lazy_update
|
323
|
+
set_missing_positions
|
324
|
+
end
|
186
325
|
end
|
187
326
|
|
188
327
|
# Two vectors are equal if the have the exact same index values corresponding
|
@@ -199,21 +338,20 @@ module Daru
|
|
199
338
|
end
|
200
339
|
end
|
201
340
|
|
202
|
-
def
|
203
|
-
|
341
|
+
def head q=10
|
342
|
+
self[0..(q-1)]
|
204
343
|
end
|
205
344
|
|
206
|
-
def
|
207
|
-
|
345
|
+
def tail q=10
|
346
|
+
self[(@size - q - 1)..(@size-1)]
|
208
347
|
end
|
209
348
|
|
210
|
-
|
211
|
-
|
349
|
+
# Reports whether missing data is present in the Vector.
|
350
|
+
def has_missing_data?
|
351
|
+
!missing_positions.empty?
|
212
352
|
end
|
353
|
+
alias :flawed? :has_missing_data?
|
213
354
|
|
214
|
-
def tail q=10
|
215
|
-
self[-q..-1]
|
216
|
-
end
|
217
355
|
|
218
356
|
# Append an element to the vector by specifying the element and index
|
219
357
|
def concat element, index=nil
|
@@ -231,8 +369,10 @@ module Daru
|
|
231
369
|
end
|
232
370
|
@data[@index[index]] = element
|
233
371
|
set_size
|
234
|
-
|
372
|
+
set_missing_positions unless Daru.lazy_update
|
235
373
|
end
|
374
|
+
alias :push :concat
|
375
|
+
alias :<< :concat
|
236
376
|
|
237
377
|
# Cast a vector to a new data type.
|
238
378
|
#
|
@@ -240,11 +380,11 @@ module Daru
|
|
240
380
|
#
|
241
381
|
# * +:dtype+ - :array for Ruby Array. :nmatrix for NMatrix.
|
242
382
|
def cast opts={}
|
243
|
-
|
383
|
+
dt = opts[:dtype]
|
244
384
|
raise ArgumentError, "Unsupported dtype #{opts[:dtype]}" unless
|
245
|
-
|
385
|
+
dt == :array or dt == :nmatrix or dt == :gsl
|
246
386
|
|
247
|
-
@data = cast_vector_to dtype
|
387
|
+
@data = cast_vector_to dt unless @dtype == dt
|
248
388
|
end
|
249
389
|
|
250
390
|
# Delete an element by value
|
@@ -264,7 +404,7 @@ module Daru
|
|
264
404
|
end
|
265
405
|
|
266
406
|
set_size
|
267
|
-
|
407
|
+
set_missing_positions unless Daru.lazy_update
|
268
408
|
end
|
269
409
|
|
270
410
|
# The type of data contained in the vector. Can be :object or :numeric. If
|
@@ -308,11 +448,19 @@ module Daru
|
|
308
448
|
Daru::Vector.new uniq_vector, name: @name, index: new_index, dtype: @dtype
|
309
449
|
end
|
310
450
|
|
451
|
+
def any? &block
|
452
|
+
@data.data.any?(&block)
|
453
|
+
end
|
454
|
+
|
455
|
+
def all? &block
|
456
|
+
@data.data.all?(&block)
|
457
|
+
end
|
458
|
+
|
311
459
|
# Sorts a vector according to its values. If a block is specified, the contents
|
312
|
-
#
|
313
|
-
#
|
314
|
-
#
|
315
|
-
#
|
460
|
+
# will be evaluated and data will be swapped whenever the block evaluates
|
461
|
+
# to *true*. Defaults to ascending order sorting. Any missing values will be
|
462
|
+
# put at the end of the vector. Preserves indexing. Default sort algorithm is
|
463
|
+
# quick sort.
|
316
464
|
#
|
317
465
|
# == Options
|
318
466
|
#
|
@@ -323,7 +471,7 @@ module Daru
|
|
323
471
|
#
|
324
472
|
# v = Daru::Vector.new ["My first guitar", "jazz", "guitar"]
|
325
473
|
# # Say you want to sort these strings by length.
|
326
|
-
# v.sort { |a,b| a.length <=> b.length }
|
474
|
+
# v.sort(ascending: false) { |a,b| a.length <=> b.length }
|
327
475
|
def sort opts={}, &block
|
328
476
|
opts = {
|
329
477
|
ascending: true,
|
@@ -339,18 +487,138 @@ module Daru
|
|
339
487
|
Daru::Vector.new(vector, index: create_index(index), name: @name, dtype: @dtype)
|
340
488
|
end
|
341
489
|
|
342
|
-
# Just sort the data and get an Array in return using Enumerable#sort.
|
490
|
+
# Just sort the data and get an Array in return using Enumerable#sort.
|
491
|
+
# Non-destructive.
|
343
492
|
def sorted_data &block
|
344
493
|
@data.to_a.sort(&block)
|
345
494
|
end
|
346
495
|
|
347
|
-
# Returns *true* if the value passed actually exists
|
496
|
+
# Returns *true* if the value passed is actually exists or is not marked as
|
497
|
+
# a *missing value*.
|
348
498
|
def exists? value
|
349
|
-
|
499
|
+
!@missing_values.has_key?(self[index_of(value)])
|
500
|
+
end
|
501
|
+
|
502
|
+
# Like map, but returns a Daru::Vector with the returned values.
|
503
|
+
def recode dt=nil, &block
|
504
|
+
return to_enum(:recode) unless block_given?
|
505
|
+
|
506
|
+
dup.recode! dt, &block
|
507
|
+
end
|
508
|
+
|
509
|
+
# Destructive version of recode!
|
510
|
+
def recode! dt=nil, &block
|
511
|
+
return to_enum(:recode!) unless block_given?
|
512
|
+
|
513
|
+
@data.map!(&block).data
|
514
|
+
@data = cast_vector_to(dt || @dtype)
|
515
|
+
self
|
516
|
+
end
|
517
|
+
|
518
|
+
def delete_if &block
|
519
|
+
return to_enum(:delete_if) unless block_given?
|
520
|
+
|
521
|
+
keep_e = []
|
522
|
+
keep_i = []
|
523
|
+
each_with_index do |n, i|
|
524
|
+
if yield(n)
|
525
|
+
keep_e << n
|
526
|
+
keep_i << i
|
527
|
+
end
|
528
|
+
end
|
529
|
+
|
530
|
+
@data = cast_vector_to @dtype, keep_e
|
531
|
+
@index = @index.is_a?(MultiIndex) ? MultiIndex.new(keep_i) : Index.new(keep_i)
|
532
|
+
set_missing_positions unless Daru.lazy_update
|
533
|
+
set_size
|
534
|
+
|
535
|
+
self
|
536
|
+
end
|
537
|
+
|
538
|
+
# Reports all values that doesn't comply with a condition.
|
539
|
+
# Returns a hash with the index of data and the invalid data.
|
540
|
+
def verify &block
|
541
|
+
h = {}
|
542
|
+
(0...size).each do |i|
|
543
|
+
if !(yield @data[i])
|
544
|
+
h[i] = @data[i]
|
545
|
+
end
|
546
|
+
end
|
547
|
+
|
548
|
+
h
|
549
|
+
end
|
550
|
+
|
551
|
+
# Return an Array with the data splitted by a separator.
|
552
|
+
# a=Daru::Vector.new(["a,b","c,d","a,b","d"])
|
553
|
+
# a.splitted
|
554
|
+
# =>
|
555
|
+
# [["a","b"],["c","d"],["a","b"],["d"]]
|
556
|
+
def splitted sep=","
|
557
|
+
@data.map do |s|
|
558
|
+
if s.nil?
|
559
|
+
nil
|
560
|
+
elsif s.respond_to? :split
|
561
|
+
s.split sep
|
562
|
+
else
|
563
|
+
[s]
|
564
|
+
end
|
565
|
+
end
|
566
|
+
end
|
567
|
+
|
568
|
+
# Returns a hash of Vectors, defined by the different values
|
569
|
+
# defined on the fields
|
570
|
+
# Example:
|
571
|
+
#
|
572
|
+
# a=Daru::Vector.new(["a,b","c,d","a,b"])
|
573
|
+
# a.split_by_separator
|
574
|
+
# => {"a"=>#<Daru::Vector:0x7f2dbcc09d88
|
575
|
+
# @data=[1, 0, 1]>,
|
576
|
+
# "b"=>#<Daru::Vector:0x7f2dbcc09c48
|
577
|
+
# @data=[1, 1, 0]>,
|
578
|
+
# "c"=>#<Daru::Vector:0x7f2dbcc09b08
|
579
|
+
# @data=[0, 1, 1]>}
|
580
|
+
#
|
581
|
+
def split_by_separator sep=","
|
582
|
+
split_data = splitted sep
|
583
|
+
factors = split_data.flatten.uniq.compact
|
584
|
+
|
585
|
+
out = factors.inject({}) do |h,x|
|
586
|
+
h[x] = []
|
587
|
+
h
|
588
|
+
end
|
589
|
+
|
590
|
+
split_data.each do |r|
|
591
|
+
if r.nil?
|
592
|
+
factors.each do |f|
|
593
|
+
out[f].push(nil)
|
594
|
+
end
|
595
|
+
else
|
596
|
+
factors.each do |f|
|
597
|
+
out[f].push(r.include?(f) ? 1:0)
|
598
|
+
end
|
599
|
+
end
|
600
|
+
end
|
601
|
+
|
602
|
+
out.inject({}) do |s,v|
|
603
|
+
s[v[0]] = Daru::Vector.new v[1]
|
604
|
+
s
|
605
|
+
end
|
606
|
+
end
|
607
|
+
|
608
|
+
def split_by_separator_freq(sep=",")
|
609
|
+
split_by_separator(sep).inject({}) do |a,v|
|
610
|
+
a[v[0]] = v[1].inject { |s,x| s+x.to_i }
|
611
|
+
a
|
612
|
+
end
|
613
|
+
end
|
614
|
+
|
615
|
+
def reset_index!
|
616
|
+
@index = Daru::Index.new(Array.new(size) { |i| i })
|
617
|
+
self
|
350
618
|
end
|
351
619
|
|
352
620
|
# Returns a vector which has *true* in the position where the element in self
|
353
|
-
#
|
621
|
+
# is nil, and false otherwise.
|
354
622
|
#
|
355
623
|
# == Usage
|
356
624
|
#
|
@@ -383,26 +651,34 @@ module Daru
|
|
383
651
|
end
|
384
652
|
|
385
653
|
# Replace all nils in the vector with the value passed as an argument. Destructive.
|
386
|
-
#
|
654
|
+
# See #replace_nils for non-destructive version
|
387
655
|
#
|
388
656
|
# == Arguments
|
389
657
|
#
|
390
658
|
# * +replacement+ - The value which should replace all nils
|
391
659
|
def replace_nils! replacement
|
392
|
-
|
660
|
+
missing_positions.each do |idx|
|
393
661
|
self[idx] = replacement
|
394
662
|
end
|
395
663
|
|
396
664
|
self
|
397
665
|
end
|
398
666
|
|
667
|
+
def detach_index
|
668
|
+
Daru::DataFrame.new({
|
669
|
+
index: @index.to_a.map(&:to_s),
|
670
|
+
vector: @data.to_a
|
671
|
+
})
|
672
|
+
end
|
673
|
+
|
399
674
|
# Non-destructive version of #replace_nils!
|
400
675
|
def replace_nils replacement
|
401
676
|
self.dup.replace_nils!(replacement)
|
402
677
|
end
|
403
678
|
|
679
|
+
# number of non-missing elements
|
404
680
|
def n_valid
|
405
|
-
@size
|
681
|
+
@size - missing_positions.size
|
406
682
|
end
|
407
683
|
|
408
684
|
# Returns *true* if an index exists
|
@@ -425,6 +701,20 @@ module Daru
|
|
425
701
|
end
|
426
702
|
end
|
427
703
|
|
704
|
+
# If dtype != gsl, will convert data to GSL::Vector with to_a. Otherwise returns
|
705
|
+
# the stored GSL::Vector object.
|
706
|
+
def to_gsl
|
707
|
+
if Daru.has_gsl?
|
708
|
+
if dtype == :gsl
|
709
|
+
return @data.data
|
710
|
+
else
|
711
|
+
GSL::Vector.alloc only_valid(:array).to_a
|
712
|
+
end
|
713
|
+
else
|
714
|
+
raise NoMethodError, "Install gsl-nmatrix for access to this functionality."
|
715
|
+
end
|
716
|
+
end
|
717
|
+
|
428
718
|
# Convert to hash. Hash keys are indexes and values are the correspoding elements
|
429
719
|
def to_hash
|
430
720
|
@index.inject({}) do |hsh, index|
|
@@ -446,12 +736,24 @@ module Daru
|
|
446
736
|
# Convert to html for iruby
|
447
737
|
def to_html threshold=30
|
448
738
|
name = @name || 'nil'
|
449
|
-
html =
|
739
|
+
html = "<table>" +
|
740
|
+
"<tr>" +
|
741
|
+
"<th colspan=\"2\">" +
|
742
|
+
"Daru::Vector:#{self.object_id} " + " size: #{size}" +
|
743
|
+
"</th>" +
|
744
|
+
"</tr>"
|
745
|
+
html += '<tr><th> </th><th>' + name.to_s + '</th></tr>'
|
450
746
|
@index.each_with_index do |index, num|
|
451
747
|
html += '<tr><td>' + index.to_s + '</td>' + '<td>' + self[index].to_s + '</td></tr>'
|
452
748
|
|
453
749
|
if num > threshold
|
454
750
|
html += '<tr><td>...</td><td>...</td></tr>'
|
751
|
+
|
752
|
+
last_index = @index.to_a.last
|
753
|
+
html += '<tr>' +
|
754
|
+
'<td>' + last_index.to_s + '</td>' +
|
755
|
+
'<td>' + self[last_index].to_s + '</td>' +
|
756
|
+
'</tr>'
|
455
757
|
break
|
456
758
|
end
|
457
759
|
end
|
@@ -464,11 +766,45 @@ module Daru
|
|
464
766
|
to_html
|
465
767
|
end
|
466
768
|
|
769
|
+
# Create a summary of the Vector using Report Builder.
|
770
|
+
def summary(method = :to_text)
|
771
|
+
ReportBuilder.new(no_title: true).add(self).send(method)
|
772
|
+
end
|
773
|
+
|
774
|
+
def report_building b
|
775
|
+
b.section(:name => name) do |s|
|
776
|
+
s.text "n :#{size}"
|
777
|
+
s.text "n valid:#{n_valid}"
|
778
|
+
if @type == :object
|
779
|
+
s.text "factors: #{factors.to_a.join(',')}"
|
780
|
+
s.text "mode: #{mode}"
|
781
|
+
|
782
|
+
s.table(:name => "Distribution") do |t|
|
783
|
+
frequencies.sort_by { |a| a.to_s }.each do |k,v|
|
784
|
+
key = @index.include?(k) ? @index[k] : k
|
785
|
+
t.row [key, v , ("%0.2f%%" % (v.quo(n_valid)*100))]
|
786
|
+
end
|
787
|
+
end
|
788
|
+
end
|
789
|
+
|
790
|
+
s.text "median: #{median.to_s}" if (@type==:numeric or @type==:numeric)
|
791
|
+
if @type==:numeric
|
792
|
+
s.text "mean: %0.4f" % mean
|
793
|
+
if sd
|
794
|
+
s.text "std.dev.: %0.4f" % sd
|
795
|
+
s.text "std.err.: %0.4f" % se
|
796
|
+
s.text "skew: %0.4f" % skew
|
797
|
+
s.text "kurtosis: %0.4f" % kurtosis
|
798
|
+
end
|
799
|
+
end
|
800
|
+
end
|
801
|
+
end
|
802
|
+
|
467
803
|
# Over rides original inspect for pretty printing in irb
|
468
804
|
def inspect spacing=20, threshold=15
|
469
805
|
longest = [@name.to_s.size,
|
470
|
-
@index.to_a.map(&:to_s).map(&:size).max,
|
471
|
-
@data .map(&:to_s).map(&:size).max,
|
806
|
+
(@index.to_a.map(&:to_s).map(&:size).max || 0),
|
807
|
+
(@data .map(&:to_s).map(&:size).max || 0),
|
472
808
|
'nil'.size].max
|
473
809
|
|
474
810
|
content = ""
|
@@ -503,6 +839,11 @@ module Daru
|
|
503
839
|
#
|
504
840
|
# @param new_name [Symbol] The new name.
|
505
841
|
def rename new_name
|
842
|
+
if new_name.is_a?(Numeric)
|
843
|
+
@name = new_name
|
844
|
+
return
|
845
|
+
end
|
846
|
+
|
506
847
|
@name = new_name.to_sym
|
507
848
|
end
|
508
849
|
|
@@ -511,12 +852,176 @@ module Daru
|
|
511
852
|
Daru::Vector.new @data.dup, name: @name, index: @index.dup
|
512
853
|
end
|
513
854
|
|
855
|
+
# == Bootstrap
|
856
|
+
# Generate +nr+ resamples (with replacement) of size +s+
|
857
|
+
# from vector, computing each estimate from +estimators+
|
858
|
+
# over each resample.
|
859
|
+
# +estimators+ could be
|
860
|
+
# a) Hash with variable names as keys and lambdas as values
|
861
|
+
# a.bootstrap(:log_s2=>lambda {|v| Math.log(v.variance)},1000)
|
862
|
+
# b) Array with names of method to bootstrap
|
863
|
+
# a.bootstrap([:mean, :sd],1000)
|
864
|
+
# c) A single method to bootstrap
|
865
|
+
# a.jacknife(:mean, 1000)
|
866
|
+
# If s is nil, is set to vector size by default.
|
867
|
+
#
|
868
|
+
# Returns a DataFrame where each vector is a vector
|
869
|
+
# of length +nr+ containing the computed resample estimates.
|
870
|
+
def bootstrap(estimators, nr, s=nil)
|
871
|
+
s ||= size
|
872
|
+
h_est, es, bss = prepare_bootstrap(estimators)
|
873
|
+
|
874
|
+
nr.times do |i|
|
875
|
+
bs = sample_with_replacement(s)
|
876
|
+
es.each do |estimator|
|
877
|
+
bss[estimator].push(h_est[estimator].call(bs))
|
878
|
+
end
|
879
|
+
end
|
880
|
+
|
881
|
+
es.each do |est|
|
882
|
+
bss[est] = Daru::Vector.new bss[est]
|
883
|
+
end
|
884
|
+
|
885
|
+
Daru::DataFrame.new bss
|
886
|
+
end
|
887
|
+
|
888
|
+
# == Jacknife
|
889
|
+
# Returns a dataset with jacknife delete-+k+ +estimators+
|
890
|
+
# +estimators+ could be:
|
891
|
+
# a) Hash with variable names as keys and lambdas as values
|
892
|
+
# a.jacknife(:log_s2=>lambda {|v| Math.log(v.variance)})
|
893
|
+
# b) Array with method names to jacknife
|
894
|
+
# a.jacknife([:mean, :sd])
|
895
|
+
# c) A single method to jacknife
|
896
|
+
# a.jacknife(:mean)
|
897
|
+
# +k+ represent the block size for block jacknife. By default
|
898
|
+
# is set to 1, for classic delete-one jacknife.
|
899
|
+
#
|
900
|
+
# Returns a dataset where each vector is an vector
|
901
|
+
# of length +cases+/+k+ containing the computed jacknife estimates.
|
902
|
+
#
|
903
|
+
# == Reference:
|
904
|
+
# * Sawyer, S. (2005). Resampling Data: Using a Statistical Jacknife.
|
905
|
+
def jackknife(estimators, k=1)
|
906
|
+
raise "n should be divisible by k:#{k}" unless size % k==0
|
907
|
+
|
908
|
+
nb = (size / k).to_i
|
909
|
+
h_est, es, ps = prepare_bootstrap(estimators)
|
910
|
+
|
911
|
+
est_n = es.inject({}) do |h,v|
|
912
|
+
h[v] = h_est[v].call(self)
|
913
|
+
h
|
914
|
+
end
|
915
|
+
|
916
|
+
nb.times do |i|
|
917
|
+
other = @data.dup
|
918
|
+
other.slice!(i*k, k)
|
919
|
+
other = Daru::Vector.new other
|
920
|
+
|
921
|
+
es.each do |estimator|
|
922
|
+
# Add pseudovalue
|
923
|
+
ps[estimator].push(
|
924
|
+
nb * est_n[estimator] - (nb-1) * h_est[estimator].call(other))
|
925
|
+
end
|
926
|
+
end
|
927
|
+
|
928
|
+
es.each do |est|
|
929
|
+
ps[est] = Daru::Vector.new ps[est]
|
930
|
+
end
|
931
|
+
Daru::DataFrame.new ps
|
932
|
+
end
|
933
|
+
|
934
|
+
# Creates a new vector consisting only of non-nil data
|
935
|
+
#
|
936
|
+
# == Arguments
|
937
|
+
#
|
938
|
+
# @as_a [Symbol] Passing :array will return only the elements
|
939
|
+
# as an Array. Otherwise will return a Daru::Vector.
|
940
|
+
#
|
941
|
+
# @duplicate [Symbol] In case no missing data is found in the
|
942
|
+
# vector, setting this to false will return the same vector.
|
943
|
+
# Otherwise, a duplicate will be returned irrespective of
|
944
|
+
# presence of missing data.
|
945
|
+
def only_valid as_a=:vector, duplicate=true
|
946
|
+
return self.dup if !has_missing_data? and as_a == :vector and duplicate
|
947
|
+
return self if !has_missing_data? and as_a == :vector and !duplicate
|
948
|
+
return self.to_a if !has_missing_data? and as_a != :vector
|
949
|
+
|
950
|
+
new_index = @index.to_a - missing_positions
|
951
|
+
new_vector = new_index.map do |idx|
|
952
|
+
self[idx]
|
953
|
+
end
|
954
|
+
|
955
|
+
return new_vector if as_a != :vector
|
956
|
+
|
957
|
+
Daru::Vector.new new_vector, index: new_index, name: @name, dtype: dtype
|
958
|
+
end
|
959
|
+
|
960
|
+
# Returns a Vector containing only missing data (preserves indexes).
|
961
|
+
def only_missing as_a=:vector
|
962
|
+
if as_a == :vector
|
963
|
+
self[*missing_positions]
|
964
|
+
elsif as_a == :array
|
965
|
+
self[*missing_positions].to_a
|
966
|
+
end
|
967
|
+
end
|
968
|
+
|
969
|
+
# Returns a Vector with only numerical data. Missing data is included
|
970
|
+
# but non-Numeric objects are excluded. Preserves index.
|
971
|
+
def only_numerics
|
972
|
+
numeric_indexes = []
|
973
|
+
|
974
|
+
each_with_index do |v, i|
|
975
|
+
numeric_indexes << i if(v.kind_of?(Numeric) or @missing_values.has_key?(v))
|
976
|
+
end
|
977
|
+
|
978
|
+
self[*numeric_indexes]
|
979
|
+
end
|
980
|
+
|
981
|
+
# Returns the database type for the vector, according to its content
|
982
|
+
def db_type(dbs=:mysql)
|
983
|
+
# first, detect any character not number
|
984
|
+
if @data.find {|v| v.to_s=~/\d{2,2}-\d{2,2}-\d{4,4}/} or @data.find {|v| v.to_s=~/\d{4,4}-\d{2,2}-\d{2,2}/}
|
985
|
+
return "DATE"
|
986
|
+
elsif @data.find {|v| v.to_s=~/[^0-9e.-]/ }
|
987
|
+
return "VARCHAR (255)"
|
988
|
+
elsif @data.find {|v| v.to_s=~/\./}
|
989
|
+
return "DOUBLE"
|
990
|
+
else
|
991
|
+
return "INTEGER"
|
992
|
+
end
|
993
|
+
end
|
994
|
+
|
514
995
|
# Copies the structure of the vector (i.e the index, size, etc.) and fills all
|
515
|
-
#
|
996
|
+
# all values with nils.
|
516
997
|
def clone_structure
|
517
998
|
Daru::Vector.new(([nil]*@size), name: @name, index: @index.dup)
|
518
999
|
end
|
519
1000
|
|
1001
|
+
# Save the vector to a file
|
1002
|
+
#
|
1003
|
+
# == Arguments
|
1004
|
+
#
|
1005
|
+
# * filename - Path of file where the vector is to be saved
|
1006
|
+
def save filename
|
1007
|
+
Daru::IO.save self, filename
|
1008
|
+
end
|
1009
|
+
|
1010
|
+
def _dump(depth) # :nodoc:
|
1011
|
+
Marshal.dump({
|
1012
|
+
data: @data.to_a,
|
1013
|
+
dtype: @dtype,
|
1014
|
+
name: @name,
|
1015
|
+
index: @index,
|
1016
|
+
missing_values: @missing_values})
|
1017
|
+
end
|
1018
|
+
|
1019
|
+
def self._load(data) # :nodoc:
|
1020
|
+
h = Marshal.load(data)
|
1021
|
+
Daru::Vector.new(h[:data], index: h[:index],
|
1022
|
+
name: h[:name], dtype: h[:dtype], missing_values: h[:missing_values])
|
1023
|
+
end
|
1024
|
+
|
520
1025
|
def daru_vector *name
|
521
1026
|
self
|
522
1027
|
end
|
@@ -535,6 +1040,26 @@ module Daru
|
|
535
1040
|
|
536
1041
|
private
|
537
1042
|
|
1043
|
+
# For an array or hash of estimators methods, returns
|
1044
|
+
# an array with three elements
|
1045
|
+
# 1.- A hash with estimators names as keys and lambdas as values
|
1046
|
+
# 2.- An array with estimators names
|
1047
|
+
# 3.- A Hash with estimators names as keys and empty arrays as values
|
1048
|
+
def prepare_bootstrap(estimators)
|
1049
|
+
h_est = estimators
|
1050
|
+
h_est = [h_est] unless h_est.is_a?(Array) or h_est.is_a?(Hash)
|
1051
|
+
|
1052
|
+
if h_est.is_a? Array
|
1053
|
+
h_est = h_est.inject({}) do |h, est|
|
1054
|
+
h[est] = lambda { |v| Daru::Vector.new(v).send(est) }
|
1055
|
+
h
|
1056
|
+
end
|
1057
|
+
end
|
1058
|
+
bss = h_est.keys.inject({}) { |h,v| h[v] = []; h }
|
1059
|
+
|
1060
|
+
[h_est, h_est.keys, bss]
|
1061
|
+
end
|
1062
|
+
|
538
1063
|
def quick_sort vector, index, order, &block
|
539
1064
|
recursive_quick_sort vector, index, order, 0, @size-1, &block
|
540
1065
|
[vector, index]
|
@@ -599,18 +1124,17 @@ module Daru
|
|
599
1124
|
end
|
600
1125
|
|
601
1126
|
# Note: To maintain sanity, this _MUST_ be the _ONLY_ place in daru where the
|
602
|
-
#
|
1127
|
+
# @dtype variable is set and the underlying data type of vector changed.
|
603
1128
|
def cast_vector_to dtype, source=nil, nm_dtype=nil
|
604
|
-
source = @data if source.nil?
|
605
|
-
return @data if @dtype and @dtype == dtype
|
1129
|
+
source = @data.to_a if source.nil?
|
606
1130
|
|
607
1131
|
new_vector =
|
608
1132
|
case dtype
|
609
|
-
when :array then Daru::Accessors::ArrayWrapper.new(source
|
610
|
-
when :nmatrix then Daru::Accessors::NMatrixWrapper.new(source
|
611
|
-
|
1133
|
+
when :array then Daru::Accessors::ArrayWrapper.new(source, self)
|
1134
|
+
when :nmatrix then Daru::Accessors::NMatrixWrapper.new(source, self, nm_dtype)
|
1135
|
+
when :gsl then Daru::Accessors::GSLWrapper.new(source, self)
|
612
1136
|
when :mdarray then raise NotImplementedError, "MDArray not yet supported."
|
613
|
-
else
|
1137
|
+
else raise "Unknown dtype #{dtype}"
|
614
1138
|
end
|
615
1139
|
|
616
1140
|
@dtype = dtype || :array
|
@@ -649,12 +1173,11 @@ module Daru
|
|
649
1173
|
end
|
650
1174
|
end
|
651
1175
|
|
652
|
-
def
|
653
|
-
@
|
1176
|
+
def set_missing_positions
|
1177
|
+
@missing_positions = []
|
654
1178
|
@index.each do |e|
|
655
|
-
@
|
1179
|
+
@missing_positions << e if (@missing_values.has_key?(self[e]))
|
656
1180
|
end
|
657
|
-
@nil_positions.uniq!
|
658
1181
|
end
|
659
1182
|
|
660
1183
|
def create_index potential_index
|
@@ -669,5 +1192,17 @@ module Daru
|
|
669
1192
|
pos = index_for location
|
670
1193
|
pos ? @data[pos] : nil
|
671
1194
|
end
|
1195
|
+
|
1196
|
+
# Setup missing_values. The missing_values instance variable is set
|
1197
|
+
# as a Hash for faster lookup times.
|
1198
|
+
def set_missing_values values_arry
|
1199
|
+
@missing_values = {}
|
1200
|
+
@missing_values[nil] = 0
|
1201
|
+
if values_arry
|
1202
|
+
values_arry.each do |e|
|
1203
|
+
@missing_values[e] = 0
|
1204
|
+
end
|
1205
|
+
end
|
1206
|
+
end
|
672
1207
|
end
|
673
1208
|
end
|