daru 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rubocop.yml +99 -0
- data/.rubocop_todo.yml +44 -0
- data/.travis.yml +3 -1
- data/CONTRIBUTING.md +5 -1
- data/History.md +43 -0
- data/README.md +3 -4
- data/benchmarks/duplicating.rb +45 -0
- data/benchmarks/group_by.rb +7 -7
- data/benchmarks/joining.rb +52 -0
- data/benchmarks/sorting.rb +9 -2
- data/benchmarks/statistics.rb +39 -0
- data/daru.gemspec +4 -4
- data/lib/daru.rb +9 -9
- data/lib/daru/accessors/array_wrapper.rb +15 -11
- data/lib/daru/accessors/dataframe_by_row.rb +1 -1
- data/lib/daru/accessors/gsl_wrapper.rb +30 -19
- data/lib/daru/accessors/mdarray_wrapper.rb +1 -3
- data/lib/daru/accessors/nmatrix_wrapper.rb +15 -15
- data/lib/daru/core/group_by.rb +69 -16
- data/lib/daru/core/merge.rb +135 -151
- data/lib/daru/core/query.rb +9 -30
- data/lib/daru/dataframe.rb +476 -439
- data/lib/daru/date_time/index.rb +150 -137
- data/lib/daru/date_time/offsets.rb +45 -41
- data/lib/daru/extensions/rserve.rb +4 -4
- data/lib/daru/index.rb +88 -64
- data/lib/daru/io/io.rb +33 -34
- data/lib/daru/io/sql_data_source.rb +11 -11
- data/lib/daru/maths/arithmetic/dataframe.rb +19 -19
- data/lib/daru/maths/arithmetic/vector.rb +9 -14
- data/lib/daru/maths/statistics/dataframe.rb +89 -61
- data/lib/daru/maths/statistics/vector.rb +226 -97
- data/lib/daru/monkeys.rb +23 -30
- data/lib/daru/plotting/dataframe.rb +27 -28
- data/lib/daru/plotting/vector.rb +12 -13
- data/lib/daru/vector.rb +221 -330
- data/lib/daru/version.rb +2 -2
- data/spec/core/group_by_spec.rb +16 -0
- data/spec/core/merge_spec.rb +30 -14
- data/spec/dataframe_spec.rb +268 -14
- data/spec/index_spec.rb +23 -5
- data/spec/io/io_spec.rb +37 -16
- data/spec/math/statistics/dataframe_spec.rb +40 -8
- data/spec/math/statistics/vector_spec.rb +135 -10
- data/spec/monkeys_spec.rb +3 -3
- data/spec/vector_spec.rb +157 -25
- metadata +41 -21
data/lib/daru/io/io.rb
CHANGED
@@ -5,12 +5,10 @@ module Daru
|
|
5
5
|
row.to_a.map do |c|
|
6
6
|
if empty.include?(c)
|
7
7
|
nil
|
8
|
+
elsif c.is_a?(String) && c.is_number?
|
9
|
+
c =~ /^\d+$/ ? c.to_i : c.tr(',','.').to_f
|
8
10
|
else
|
9
|
-
|
10
|
-
c =~ /^\d+$/ ? c.to_i : c.gsub(",",".").to_f
|
11
|
-
else
|
12
|
-
c
|
13
|
-
end
|
11
|
+
c
|
14
12
|
end
|
15
13
|
end
|
16
14
|
end
|
@@ -23,7 +21,7 @@ module Daru
|
|
23
21
|
|
24
22
|
def from_excel path, opts={}
|
25
23
|
opts = {
|
26
|
-
:
|
24
|
+
worksheet_id: 0
|
27
25
|
}.merge opts
|
28
26
|
|
29
27
|
worksheet_id = opts[:worksheet_id]
|
@@ -41,10 +39,10 @@ module Daru
|
|
41
39
|
df
|
42
40
|
end
|
43
41
|
|
44
|
-
def dataframe_write_excel dataframe, path,
|
42
|
+
def dataframe_write_excel dataframe, path, _opts={}
|
45
43
|
book = Spreadsheet::Workbook.new
|
46
44
|
sheet = book.create_worksheet
|
47
|
-
format = Spreadsheet::Format.new :
|
45
|
+
format = Spreadsheet::Format.new color: :blue, weight: :bold
|
48
46
|
|
49
47
|
sheet.row(0).concat(dataframe.vectors.to_a.map(&:to_s)) # Unfreeze strings
|
50
48
|
sheet.row(0).default_format = format
|
@@ -62,18 +60,26 @@ module Daru
|
|
62
60
|
opts[:col_sep] ||= ','
|
63
61
|
opts[:converters] ||= :numeric
|
64
62
|
|
65
|
-
daru_options = opts.keys.
|
63
|
+
daru_options = opts.keys.each_with_object({}) do |hash, k|
|
66
64
|
if [:clone, :order, :index, :name].include?(k)
|
67
65
|
hash[k] = opts[k]
|
68
66
|
opts.delete k
|
69
67
|
end
|
70
|
-
|
71
|
-
hash
|
72
68
|
end
|
73
69
|
|
74
|
-
# Preprocess headers for detecting and correcting repetition in
|
70
|
+
# Preprocess headers for detecting and correcting repetition in
|
75
71
|
# case the :headers option is not specified.
|
76
|
-
|
72
|
+
if opts[:headers]
|
73
|
+
opts[:header_converters] ||= :symbol
|
74
|
+
|
75
|
+
csv = ::CSV.read(path, 'rb',opts)
|
76
|
+
yield csv if block_given?
|
77
|
+
|
78
|
+
hsh = {}
|
79
|
+
csv.by_col.each do |col_name, values|
|
80
|
+
hsh[col_name] = values
|
81
|
+
end
|
82
|
+
else
|
77
83
|
csv = ::CSV.open(path, 'rb', opts)
|
78
84
|
yield csv if block_given?
|
79
85
|
|
@@ -86,16 +92,9 @@ module Daru
|
|
86
92
|
headers.each_with_index do |h, i|
|
87
93
|
hsh[h] = csv_as_arrays[i]
|
88
94
|
end
|
89
|
-
else
|
90
|
-
opts[:header_converters] ||= :symbol
|
91
|
-
|
92
|
-
csv = ::CSV.read(path, 'rb',opts)
|
93
|
-
yield csv if block_given?
|
94
95
|
|
95
|
-
|
96
|
-
|
97
|
-
hsh[col_name] = values
|
98
|
-
end
|
96
|
+
# Order columns as given in CSV
|
97
|
+
daru_options[:order] = headers.to_a
|
99
98
|
end
|
100
99
|
|
101
100
|
Daru::DataFrame.new(hsh,daru_options)
|
@@ -107,14 +106,14 @@ module Daru
|
|
107
106
|
}.merge(opts)
|
108
107
|
|
109
108
|
writer = ::CSV.open(path, 'w', options)
|
110
|
-
writer << dataframe.vectors.to_a
|
109
|
+
writer << dataframe.vectors.to_a unless options[:headers] == false
|
111
110
|
|
112
111
|
dataframe.each_row do |row|
|
113
|
-
if options[:convert_comma]
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
112
|
+
writer << if options[:convert_comma]
|
113
|
+
row.map { |v| v.to_s.tr('.', ',') }
|
114
|
+
else
|
115
|
+
row.to_a
|
116
|
+
end
|
118
117
|
end
|
119
118
|
|
120
119
|
writer.close
|
@@ -134,10 +133,10 @@ module Daru
|
|
134
133
|
|
135
134
|
def dataframe_write_sql ds, dbh, table
|
136
135
|
require 'dbi'
|
137
|
-
query = "INSERT INTO #{table} ("+ds.vectors.to_a.join(
|
138
|
-
sth =
|
136
|
+
query = "INSERT INTO #{table} ("+ds.vectors.to_a.join(',')+') VALUES ('+(['?']*ds.vectors.size).join(',')+')'
|
137
|
+
sth = dbh.prepare(query)
|
139
138
|
ds.each_row { |c| sth.execute(*c.to_a) }
|
140
|
-
|
139
|
+
true
|
141
140
|
end
|
142
141
|
|
143
142
|
# Load dataframe from AR::Relation
|
@@ -158,7 +157,7 @@ module Daru
|
|
158
157
|
vectors = Hash[*fields.map { |name|
|
159
158
|
[
|
160
159
|
name,
|
161
|
-
Daru::Vector.new([]).tap {|v| v.rename name }
|
160
|
+
Daru::Vector.new([]).tap { |v| v.rename name }
|
162
161
|
]
|
163
162
|
}.flatten]
|
164
163
|
|
@@ -174,9 +173,9 @@ module Daru
|
|
174
173
|
|
175
174
|
def from_plaintext filename, fields
|
176
175
|
ds = Daru::DataFrame.new({}, order: fields)
|
177
|
-
fp = File.open(filename,
|
176
|
+
fp = File.open(filename,'r')
|
178
177
|
fp.each_line do |line|
|
179
|
-
row = Daru::IOHelpers.process_row(line.strip.split(/\s+/),[
|
178
|
+
row = Daru::IOHelpers.process_row(line.strip.split(/\s+/),[''])
|
180
179
|
next if row == ["\x1A"]
|
181
180
|
ds.add_row(row)
|
182
181
|
end
|
@@ -9,15 +9,15 @@ module Daru
|
|
9
9
|
@query = query
|
10
10
|
end
|
11
11
|
|
12
|
-
def each_column_name
|
12
|
+
def each_column_name
|
13
13
|
result.column_names.each do |column_name|
|
14
|
-
|
14
|
+
yield(column_name.to_sym)
|
15
15
|
end
|
16
16
|
end
|
17
17
|
|
18
|
-
def each_row
|
18
|
+
def each_row
|
19
19
|
result.fetch do |row|
|
20
|
-
|
20
|
+
yield(row.to_a)
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
@@ -36,15 +36,15 @@ module Daru
|
|
36
36
|
@query = query
|
37
37
|
end
|
38
38
|
|
39
|
-
def each_column_name
|
39
|
+
def each_column_name
|
40
40
|
result.columns.each do |column_name|
|
41
|
-
|
41
|
+
yield(column_name.to_sym)
|
42
42
|
end
|
43
43
|
end
|
44
44
|
|
45
|
-
def each_row
|
45
|
+
def each_row
|
46
46
|
result.each do |row|
|
47
|
-
|
47
|
+
yield(row.values)
|
48
48
|
end
|
49
49
|
end
|
50
50
|
|
@@ -59,7 +59,7 @@ module Daru
|
|
59
59
|
private_constant :ActiveRecordConnectionAdapter
|
60
60
|
|
61
61
|
def self.make_dataframe(db, query)
|
62
|
-
|
62
|
+
new(db, query).make_dataframe
|
63
63
|
end
|
64
64
|
|
65
65
|
def initialize(db, query)
|
@@ -105,11 +105,11 @@ module Daru
|
|
105
105
|
end
|
106
106
|
|
107
107
|
def check_dbi(obj)
|
108
|
-
DBI::DatabaseHandle
|
108
|
+
obj.is_a?(DBI::DatabaseHandle)
|
109
109
|
end
|
110
110
|
|
111
111
|
def check_active_record_connection(obj)
|
112
|
-
ActiveRecord::ConnectionAdapters::AbstractAdapter
|
112
|
+
obj.is_a?(ActiveRecord::ConnectionAdapters::AbstractAdapter)
|
113
113
|
end
|
114
114
|
end
|
115
115
|
end
|
@@ -2,15 +2,14 @@ module Daru
|
|
2
2
|
module Maths
|
3
3
|
# Module encapsulating all aritmetic methods on DataFrame.
|
4
4
|
module Arithmetic
|
5
|
-
module DataFrame
|
6
|
-
|
5
|
+
module DataFrame
|
7
6
|
# Add a scalar or another DataFrame
|
8
7
|
def + other
|
9
8
|
binary_operation :+, other
|
10
9
|
end
|
11
10
|
|
12
11
|
# Subtract a scalar or another DataFrame.
|
13
|
-
def - other
|
12
|
+
def - other
|
14
13
|
binary_operation :-, other
|
15
14
|
end
|
16
15
|
|
@@ -19,7 +18,7 @@ module Daru
|
|
19
18
|
binary_operation :*, other
|
20
19
|
end
|
21
20
|
|
22
|
-
# Divide a scalar or another DataFrame.
|
21
|
+
# Divide a scalar or another DataFrame.
|
23
22
|
def / other
|
24
23
|
binary_operation :/, other
|
25
24
|
end
|
@@ -36,18 +35,19 @@ module Daru
|
|
36
35
|
|
37
36
|
# Calculate exponenential of all vectors with numeric values.
|
38
37
|
def exp
|
39
|
-
only_numerics(clone: false).recode
|
38
|
+
only_numerics(clone: false).recode(&:exp)
|
40
39
|
end
|
41
40
|
|
42
41
|
# Calcuate square root of numeric vectors.
|
43
42
|
def sqrt
|
44
|
-
only_numerics(clone: false).recode
|
43
|
+
only_numerics(clone: false).recode(&:sqrt)
|
45
44
|
end
|
46
45
|
|
47
46
|
def round precision=0
|
48
47
|
only_numerics(clone: false).recode { |v| v.round(precision) }
|
49
48
|
end
|
50
|
-
|
49
|
+
|
50
|
+
private
|
51
51
|
|
52
52
|
def binary_operation operation, other
|
53
53
|
case other
|
@@ -59,27 +59,27 @@ module Daru
|
|
59
59
|
end
|
60
60
|
|
61
61
|
def dataframe_binary_operation operation, other
|
62
|
-
all_vectors = (
|
63
|
-
all_indexes = (
|
62
|
+
all_vectors = (vectors.to_a | other.vectors.to_a).sort
|
63
|
+
all_indexes = (index.to_a | other.index.to_a).sort
|
64
64
|
|
65
65
|
hsh = {}
|
66
66
|
all_vectors.each do |vector_name|
|
67
|
-
this =
|
67
|
+
this = has_vector?(vector_name) ? self[vector_name] : nil
|
68
68
|
that = other.has_vector?(vector_name) ? other[vector_name] : nil
|
69
69
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
name: vector_name)
|
75
|
-
|
70
|
+
hsh[vector_name] =
|
71
|
+
if this && that
|
72
|
+
this.send(operation, that)
|
73
|
+
else
|
74
|
+
Daru::Vector.new([], index: all_indexes, name: vector_name)
|
75
|
+
end
|
76
76
|
end
|
77
77
|
|
78
|
-
Daru::DataFrame.new(hsh, index: all_indexes, name: @name, dtype: @dtype)
|
78
|
+
Daru::DataFrame.new(hsh, index: all_indexes, name: @name, dtype: @dtype)
|
79
79
|
end
|
80
80
|
|
81
81
|
def scalar_binary_operation operation, other
|
82
|
-
clone =
|
82
|
+
clone = dup
|
83
83
|
clone.map_vectors! do |vector|
|
84
84
|
vector = vector.send(operation, other) if vector.type == :numeric
|
85
85
|
vector
|
@@ -90,4 +90,4 @@ module Daru
|
|
90
90
|
end
|
91
91
|
end
|
92
92
|
end
|
93
|
-
end
|
93
|
+
end
|
@@ -35,17 +35,17 @@ module Daru
|
|
35
35
|
end
|
36
36
|
|
37
37
|
def abs
|
38
|
-
|
38
|
+
recode { |e| e.abs unless e.nil? }
|
39
39
|
end
|
40
40
|
|
41
41
|
def round precision=0
|
42
|
-
|
42
|
+
recode { |e| e.round(precision) unless e.nil? }
|
43
43
|
end
|
44
44
|
|
45
|
-
|
45
|
+
private
|
46
46
|
|
47
47
|
def math_unary_op operation
|
48
|
-
|
48
|
+
recode { |e| Math.send(operation, e) unless e.nil? }
|
49
49
|
end
|
50
50
|
|
51
51
|
def binary_op operation, other
|
@@ -58,8 +58,8 @@ module Daru
|
|
58
58
|
end
|
59
59
|
|
60
60
|
def v2o_binary operation, other
|
61
|
-
Daru::Vector.new
|
62
|
-
|
61
|
+
Daru::Vector.new map { |e| e.nil? ? nil : e.send(operation, other) },
|
62
|
+
name: @name, index: @index
|
63
63
|
end
|
64
64
|
|
65
65
|
def v2v_binary operation, other
|
@@ -71,13 +71,8 @@ module Daru
|
|
71
71
|
this = self.index.include?(idx) ? self[idx] : nil
|
72
72
|
that = other.index.include?(idx) ? other[idx] : nil
|
73
73
|
|
74
|
-
|
75
|
-
|
76
|
-
common_idxs << idx
|
77
|
-
else
|
78
|
-
elements << nil
|
79
|
-
common_idxs << idx
|
80
|
-
end
|
74
|
+
elements << (this && that ? this.send(operation, that) : nil)
|
75
|
+
common_idxs << idx
|
81
76
|
end
|
82
77
|
|
83
78
|
Daru::Vector.new(elements, name: @name, index: common_idxs)
|
@@ -85,4 +80,4 @@ module Daru
|
|
85
80
|
end
|
86
81
|
end
|
87
82
|
end
|
88
|
-
end
|
83
|
+
end
|
@@ -2,45 +2,41 @@ module Daru
|
|
2
2
|
module Maths
|
3
3
|
module Statistics
|
4
4
|
module DataFrame
|
5
|
-
#
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
#
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
#
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
#
|
21
|
-
|
22
|
-
|
5
|
+
# @!method mean
|
6
|
+
# Calculate mean of numeric vectors
|
7
|
+
# @!method variance_sample
|
8
|
+
# Calculate sample variance of numeric vectors
|
9
|
+
# @!method range
|
10
|
+
# Calculate range of numeric vectors
|
11
|
+
# @!method median
|
12
|
+
# Calculate median of numeric vectors
|
13
|
+
# @!method mode
|
14
|
+
# Calculate mode of numeric vectors
|
15
|
+
# @!method std
|
16
|
+
# Calculate sample standard deviation of numeric vectors
|
17
|
+
# @!method sum
|
18
|
+
# Calculate sum of numeric vectors
|
19
|
+
# @!method count
|
20
|
+
# Count the number of non-nil values in each vector
|
21
|
+
# @!method min
|
22
|
+
# Calculate the minimum value of each numeric vector
|
23
|
+
# @!method product
|
24
|
+
# Compute the product of each numeric vector
|
25
|
+
[:mean, :variance_sample, :range, :median, :mode, :std, :sum, :count, :min, :product].each do |meth|
|
26
|
+
define_method(meth) do
|
27
|
+
compute_stats meth
|
28
|
+
end
|
23
29
|
end
|
24
30
|
|
25
31
|
# Calculate the maximum value of each numeric vector.
|
26
32
|
def max opts={}
|
27
33
|
if opts[:vector]
|
28
|
-
|
34
|
+
row[*self[opts[:vector]].max_index.index.to_a]
|
29
35
|
else
|
30
36
|
compute_stats :max
|
31
37
|
end
|
32
38
|
end
|
33
39
|
|
34
|
-
# Calculate the minimmum value of each numeric vector.
|
35
|
-
def min
|
36
|
-
compute_stats :min
|
37
|
-
end
|
38
|
-
|
39
|
-
# Compute the product of each numeric vector.
|
40
|
-
def product
|
41
|
-
compute_stats :product
|
42
|
-
end
|
43
|
-
|
44
40
|
# @!method cumsum
|
45
41
|
# Calculate cumulative sum of each numeric Vector
|
46
42
|
# @!method standardize
|
@@ -51,7 +47,7 @@ module Daru
|
|
51
47
|
# @!method ema
|
52
48
|
# Calculate exponential moving average.
|
53
49
|
# @param [Integer] n (10) Loopback length.
|
54
|
-
# @param [TrueClass, FalseClass, NilClass] wilder (false) If true,
|
50
|
+
# @param [TrueClass, FalseClass, NilClass] wilder (false) If true,
|
55
51
|
# 1/n value is used for smoothing; if false, uses 2/(n+1) value.
|
56
52
|
# @!method rolling_mean
|
57
53
|
# Calculate moving averages
|
@@ -74,21 +70,22 @@ module Daru
|
|
74
70
|
# @!method rolling_variance
|
75
71
|
# Calculate moving variance
|
76
72
|
# @param [Integer] n (10) Loopback length. Default to 10.
|
77
|
-
[
|
78
|
-
|
73
|
+
[
|
74
|
+
:cumsum,:standardize,:acf,:ema,:rolling_mean,:rolling_median,:rolling_max,
|
75
|
+
:rolling_min,:rolling_count,:rolling_std,:rolling_variance, :rolling_sum
|
79
76
|
].each do |meth|
|
80
77
|
define_method(meth) do |*args|
|
81
78
|
apply_method_to_numerics meth, *args
|
82
79
|
end
|
83
80
|
end
|
84
81
|
|
85
|
-
# Create a summary of mean, standard deviation, count, max and min of
|
82
|
+
# Create a summary of mean, standard deviation, count, max and min of
|
86
83
|
# each numeric vector in the dataframe in one shot.
|
87
|
-
#
|
84
|
+
#
|
88
85
|
# == Arguments
|
89
|
-
#
|
90
|
-
# +methods+ - An array with aggregation methods specified as symbols to
|
91
|
-
# be applied to numeric vectors. Default is [:count, :mean, :std, :max,
|
86
|
+
#
|
87
|
+
# +methods+ - An array with aggregation methods specified as symbols to
|
88
|
+
# be applied to numeric vectors. Default is [:count, :mean, :std, :max,
|
92
89
|
# :min]. Methods will be applied in the specified order.
|
93
90
|
def describe methods=nil
|
94
91
|
methods ||= [:count, :mean, :std, :min, :max]
|
@@ -100,23 +97,54 @@ module Daru
|
|
100
97
|
Daru::DataFrame.new(description_hash, index: methods)
|
101
98
|
end
|
102
99
|
|
100
|
+
# The percent_change method computes the percent change over
|
101
|
+
# the given number of periods for numeric vectors.
|
102
|
+
#
|
103
|
+
# @param [Integer] periods (1) number of nils to insert at the beginning.
|
104
|
+
#
|
105
|
+
# @example
|
106
|
+
#
|
107
|
+
# df = Daru::DataFrame.new({
|
108
|
+
# 'col0' => [1,2,3,4,5,6],
|
109
|
+
# 'col2' => ['a','b','c','d','e','f'],
|
110
|
+
# 'col1' => [11,22,33,44,55,66]
|
111
|
+
# },
|
112
|
+
# index: ['one', 'two', 'three', 'four', 'five', 'six'],
|
113
|
+
# order: ['col0', 'col1', 'col2'])
|
114
|
+
# df.percent_change
|
115
|
+
# #=>
|
116
|
+
# # <Daru::DataFrame:23513280 @rows: 6 @cols: 2>
|
117
|
+
# # col0 col1
|
118
|
+
# # one
|
119
|
+
# # two 1.0 1.0
|
120
|
+
# # three 0.5 0.5
|
121
|
+
# # four 0.3333333333333333 0.3333333333333333
|
122
|
+
# # five 0.25 0.25
|
123
|
+
# # six 0.2 0.2
|
124
|
+
def percent_change periods=1
|
125
|
+
df_numeric = only_numerics.vectors.to_a
|
126
|
+
df = Daru::DataFrame.new({}, order: @order, index: @index, name: @name)
|
127
|
+
df_numeric.each do |vec|
|
128
|
+
df[vec] = self[vec].percent_change periods
|
129
|
+
end
|
130
|
+
df
|
131
|
+
end
|
132
|
+
|
103
133
|
# Calculate sample variance-covariance between the numeric vectors.
|
104
134
|
def covariance
|
105
135
|
cache={}
|
106
|
-
vectors =
|
136
|
+
vectors = numeric_vectors
|
107
137
|
|
108
138
|
mat_rows = vectors.collect do |row|
|
109
139
|
vectors.collect do |col|
|
110
140
|
if row == col
|
111
141
|
self[row].variance
|
142
|
+
elsif cache[[col,row]].nil?
|
143
|
+
cov = vector_cov(self[row],self[col])
|
144
|
+
cache[[row,col]] = cov
|
145
|
+
cov
|
112
146
|
else
|
113
|
-
|
114
|
-
cov = vector_cov(self[row],self[col])
|
115
|
-
cache[[row,col]] = cov
|
116
|
-
cov
|
117
|
-
else
|
118
|
-
cache[[col,row]]
|
119
|
-
end
|
147
|
+
cache[[col,row]]
|
120
148
|
end
|
121
149
|
end
|
122
150
|
end
|
@@ -125,34 +153,33 @@ module Daru
|
|
125
153
|
end
|
126
154
|
|
127
155
|
alias :cov :covariance
|
128
|
-
|
156
|
+
|
129
157
|
# Calculate the correlation between the numeric vectors.
|
130
158
|
def correlation
|
131
159
|
standard_deviation = std.to_matrix
|
132
|
-
corr_arry =
|
133
|
-
|
134
|
-
|
135
|
-
standard_deviation)
|
160
|
+
corr_arry = cov
|
161
|
+
.to_matrix
|
162
|
+
.elementwise_division(standard_deviation.transpose *
|
163
|
+
standard_deviation).to_a
|
136
164
|
|
137
165
|
Daru::DataFrame.rows(corr_arry, index: numeric_vectors, order: numeric_vectors)
|
138
166
|
end
|
139
167
|
|
140
168
|
alias :corr :correlation
|
141
169
|
|
142
|
-
|
170
|
+
private
|
143
171
|
|
144
172
|
def apply_method_to_numerics method, *args
|
145
173
|
order = []
|
146
|
-
computed = @vectors.to_a.
|
174
|
+
computed = @vectors.to_a.each_with_object([]) do |n, memo|
|
147
175
|
v = @data[@vectors[n]]
|
148
176
|
if v.type == :numeric
|
149
177
|
memo << v.send(method, *args)
|
150
178
|
order << n
|
151
179
|
end
|
152
|
-
memo
|
153
180
|
end
|
154
|
-
|
155
|
-
Daru::DataFrame.new(computed, index: @index, order: order
|
181
|
+
|
182
|
+
Daru::DataFrame.new(computed, index: @index, order: order,clone: false)
|
156
183
|
end
|
157
184
|
|
158
185
|
def vector_cov v1a, v2a
|
@@ -160,23 +187,24 @@ module Daru
|
|
160
187
|
end
|
161
188
|
|
162
189
|
def sum_of_squares v1, v2
|
163
|
-
v1a,v2a = v1.only_valid
|
190
|
+
v1a,v2a = v1.only_valid,v2.only_valid
|
164
191
|
v1a.reset_index!
|
165
|
-
v2a.reset_index!
|
192
|
+
v2a.reset_index!
|
166
193
|
m1 = v1a.mean
|
167
194
|
m2 = v2a.mean
|
168
|
-
|
195
|
+
v1a.size.times.inject(0) { |ac,i| ac+(v1a[i]-m1)*(v2a[i]-m2) }
|
169
196
|
end
|
170
197
|
|
171
198
|
def compute_stats method
|
172
199
|
Daru::Vector.new(
|
173
|
-
numeric_vectors.
|
200
|
+
numeric_vectors.each_with_object({}) do |vec, hash|
|
174
201
|
hash[vec] = self[vec].send(method)
|
175
|
-
hash
|
176
202
|
end, name: method
|
177
203
|
)
|
178
204
|
end
|
205
|
+
alias :sds :std
|
206
|
+
alias :variance :variance_sample
|
179
207
|
end
|
180
208
|
end
|
181
209
|
end
|
182
|
-
end
|
210
|
+
end
|