daru 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rubocop.yml +99 -0
- data/.rubocop_todo.yml +44 -0
- data/.travis.yml +3 -1
- data/CONTRIBUTING.md +5 -1
- data/History.md +43 -0
- data/README.md +3 -4
- data/benchmarks/duplicating.rb +45 -0
- data/benchmarks/group_by.rb +7 -7
- data/benchmarks/joining.rb +52 -0
- data/benchmarks/sorting.rb +9 -2
- data/benchmarks/statistics.rb +39 -0
- data/daru.gemspec +4 -4
- data/lib/daru.rb +9 -9
- data/lib/daru/accessors/array_wrapper.rb +15 -11
- data/lib/daru/accessors/dataframe_by_row.rb +1 -1
- data/lib/daru/accessors/gsl_wrapper.rb +30 -19
- data/lib/daru/accessors/mdarray_wrapper.rb +1 -3
- data/lib/daru/accessors/nmatrix_wrapper.rb +15 -15
- data/lib/daru/core/group_by.rb +69 -16
- data/lib/daru/core/merge.rb +135 -151
- data/lib/daru/core/query.rb +9 -30
- data/lib/daru/dataframe.rb +476 -439
- data/lib/daru/date_time/index.rb +150 -137
- data/lib/daru/date_time/offsets.rb +45 -41
- data/lib/daru/extensions/rserve.rb +4 -4
- data/lib/daru/index.rb +88 -64
- data/lib/daru/io/io.rb +33 -34
- data/lib/daru/io/sql_data_source.rb +11 -11
- data/lib/daru/maths/arithmetic/dataframe.rb +19 -19
- data/lib/daru/maths/arithmetic/vector.rb +9 -14
- data/lib/daru/maths/statistics/dataframe.rb +89 -61
- data/lib/daru/maths/statistics/vector.rb +226 -97
- data/lib/daru/monkeys.rb +23 -30
- data/lib/daru/plotting/dataframe.rb +27 -28
- data/lib/daru/plotting/vector.rb +12 -13
- data/lib/daru/vector.rb +221 -330
- data/lib/daru/version.rb +2 -2
- data/spec/core/group_by_spec.rb +16 -0
- data/spec/core/merge_spec.rb +30 -14
- data/spec/dataframe_spec.rb +268 -14
- data/spec/index_spec.rb +23 -5
- data/spec/io/io_spec.rb +37 -16
- data/spec/math/statistics/dataframe_spec.rb +40 -8
- data/spec/math/statistics/vector_spec.rb +135 -10
- data/spec/monkeys_spec.rb +3 -3
- data/spec/vector_spec.rb +157 -25
- metadata +41 -21
data/lib/daru/io/io.rb
CHANGED
@@ -5,12 +5,10 @@ module Daru
|
|
5
5
|
row.to_a.map do |c|
|
6
6
|
if empty.include?(c)
|
7
7
|
nil
|
8
|
+
elsif c.is_a?(String) && c.is_number?
|
9
|
+
c =~ /^\d+$/ ? c.to_i : c.tr(',','.').to_f
|
8
10
|
else
|
9
|
-
|
10
|
-
c =~ /^\d+$/ ? c.to_i : c.gsub(",",".").to_f
|
11
|
-
else
|
12
|
-
c
|
13
|
-
end
|
11
|
+
c
|
14
12
|
end
|
15
13
|
end
|
16
14
|
end
|
@@ -23,7 +21,7 @@ module Daru
|
|
23
21
|
|
24
22
|
def from_excel path, opts={}
|
25
23
|
opts = {
|
26
|
-
:
|
24
|
+
worksheet_id: 0
|
27
25
|
}.merge opts
|
28
26
|
|
29
27
|
worksheet_id = opts[:worksheet_id]
|
@@ -41,10 +39,10 @@ module Daru
|
|
41
39
|
df
|
42
40
|
end
|
43
41
|
|
44
|
-
def dataframe_write_excel dataframe, path,
|
42
|
+
def dataframe_write_excel dataframe, path, _opts={}
|
45
43
|
book = Spreadsheet::Workbook.new
|
46
44
|
sheet = book.create_worksheet
|
47
|
-
format = Spreadsheet::Format.new :
|
45
|
+
format = Spreadsheet::Format.new color: :blue, weight: :bold
|
48
46
|
|
49
47
|
sheet.row(0).concat(dataframe.vectors.to_a.map(&:to_s)) # Unfreeze strings
|
50
48
|
sheet.row(0).default_format = format
|
@@ -62,18 +60,26 @@ module Daru
|
|
62
60
|
opts[:col_sep] ||= ','
|
63
61
|
opts[:converters] ||= :numeric
|
64
62
|
|
65
|
-
daru_options = opts.keys.
|
63
|
+
daru_options = opts.keys.each_with_object({}) do |hash, k|
|
66
64
|
if [:clone, :order, :index, :name].include?(k)
|
67
65
|
hash[k] = opts[k]
|
68
66
|
opts.delete k
|
69
67
|
end
|
70
|
-
|
71
|
-
hash
|
72
68
|
end
|
73
69
|
|
74
|
-
# Preprocess headers for detecting and correcting repetition in
|
70
|
+
# Preprocess headers for detecting and correcting repetition in
|
75
71
|
# case the :headers option is not specified.
|
76
|
-
|
72
|
+
if opts[:headers]
|
73
|
+
opts[:header_converters] ||= :symbol
|
74
|
+
|
75
|
+
csv = ::CSV.read(path, 'rb',opts)
|
76
|
+
yield csv if block_given?
|
77
|
+
|
78
|
+
hsh = {}
|
79
|
+
csv.by_col.each do |col_name, values|
|
80
|
+
hsh[col_name] = values
|
81
|
+
end
|
82
|
+
else
|
77
83
|
csv = ::CSV.open(path, 'rb', opts)
|
78
84
|
yield csv if block_given?
|
79
85
|
|
@@ -86,16 +92,9 @@ module Daru
|
|
86
92
|
headers.each_with_index do |h, i|
|
87
93
|
hsh[h] = csv_as_arrays[i]
|
88
94
|
end
|
89
|
-
else
|
90
|
-
opts[:header_converters] ||= :symbol
|
91
|
-
|
92
|
-
csv = ::CSV.read(path, 'rb',opts)
|
93
|
-
yield csv if block_given?
|
94
95
|
|
95
|
-
|
96
|
-
|
97
|
-
hsh[col_name] = values
|
98
|
-
end
|
96
|
+
# Order columns as given in CSV
|
97
|
+
daru_options[:order] = headers.to_a
|
99
98
|
end
|
100
99
|
|
101
100
|
Daru::DataFrame.new(hsh,daru_options)
|
@@ -107,14 +106,14 @@ module Daru
|
|
107
106
|
}.merge(opts)
|
108
107
|
|
109
108
|
writer = ::CSV.open(path, 'w', options)
|
110
|
-
writer << dataframe.vectors.to_a
|
109
|
+
writer << dataframe.vectors.to_a unless options[:headers] == false
|
111
110
|
|
112
111
|
dataframe.each_row do |row|
|
113
|
-
if options[:convert_comma]
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
112
|
+
writer << if options[:convert_comma]
|
113
|
+
row.map { |v| v.to_s.tr('.', ',') }
|
114
|
+
else
|
115
|
+
row.to_a
|
116
|
+
end
|
118
117
|
end
|
119
118
|
|
120
119
|
writer.close
|
@@ -134,10 +133,10 @@ module Daru
|
|
134
133
|
|
135
134
|
def dataframe_write_sql ds, dbh, table
|
136
135
|
require 'dbi'
|
137
|
-
query = "INSERT INTO #{table} ("+ds.vectors.to_a.join(
|
138
|
-
sth =
|
136
|
+
query = "INSERT INTO #{table} ("+ds.vectors.to_a.join(',')+') VALUES ('+(['?']*ds.vectors.size).join(',')+')'
|
137
|
+
sth = dbh.prepare(query)
|
139
138
|
ds.each_row { |c| sth.execute(*c.to_a) }
|
140
|
-
|
139
|
+
true
|
141
140
|
end
|
142
141
|
|
143
142
|
# Load dataframe from AR::Relation
|
@@ -158,7 +157,7 @@ module Daru
|
|
158
157
|
vectors = Hash[*fields.map { |name|
|
159
158
|
[
|
160
159
|
name,
|
161
|
-
Daru::Vector.new([]).tap {|v| v.rename name }
|
160
|
+
Daru::Vector.new([]).tap { |v| v.rename name }
|
162
161
|
]
|
163
162
|
}.flatten]
|
164
163
|
|
@@ -174,9 +173,9 @@ module Daru
|
|
174
173
|
|
175
174
|
def from_plaintext filename, fields
|
176
175
|
ds = Daru::DataFrame.new({}, order: fields)
|
177
|
-
fp = File.open(filename,
|
176
|
+
fp = File.open(filename,'r')
|
178
177
|
fp.each_line do |line|
|
179
|
-
row = Daru::IOHelpers.process_row(line.strip.split(/\s+/),[
|
178
|
+
row = Daru::IOHelpers.process_row(line.strip.split(/\s+/),[''])
|
180
179
|
next if row == ["\x1A"]
|
181
180
|
ds.add_row(row)
|
182
181
|
end
|
@@ -9,15 +9,15 @@ module Daru
|
|
9
9
|
@query = query
|
10
10
|
end
|
11
11
|
|
12
|
-
def each_column_name
|
12
|
+
def each_column_name
|
13
13
|
result.column_names.each do |column_name|
|
14
|
-
|
14
|
+
yield(column_name.to_sym)
|
15
15
|
end
|
16
16
|
end
|
17
17
|
|
18
|
-
def each_row
|
18
|
+
def each_row
|
19
19
|
result.fetch do |row|
|
20
|
-
|
20
|
+
yield(row.to_a)
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
@@ -36,15 +36,15 @@ module Daru
|
|
36
36
|
@query = query
|
37
37
|
end
|
38
38
|
|
39
|
-
def each_column_name
|
39
|
+
def each_column_name
|
40
40
|
result.columns.each do |column_name|
|
41
|
-
|
41
|
+
yield(column_name.to_sym)
|
42
42
|
end
|
43
43
|
end
|
44
44
|
|
45
|
-
def each_row
|
45
|
+
def each_row
|
46
46
|
result.each do |row|
|
47
|
-
|
47
|
+
yield(row.values)
|
48
48
|
end
|
49
49
|
end
|
50
50
|
|
@@ -59,7 +59,7 @@ module Daru
|
|
59
59
|
private_constant :ActiveRecordConnectionAdapter
|
60
60
|
|
61
61
|
def self.make_dataframe(db, query)
|
62
|
-
|
62
|
+
new(db, query).make_dataframe
|
63
63
|
end
|
64
64
|
|
65
65
|
def initialize(db, query)
|
@@ -105,11 +105,11 @@ module Daru
|
|
105
105
|
end
|
106
106
|
|
107
107
|
def check_dbi(obj)
|
108
|
-
DBI::DatabaseHandle
|
108
|
+
obj.is_a?(DBI::DatabaseHandle)
|
109
109
|
end
|
110
110
|
|
111
111
|
def check_active_record_connection(obj)
|
112
|
-
ActiveRecord::ConnectionAdapters::AbstractAdapter
|
112
|
+
obj.is_a?(ActiveRecord::ConnectionAdapters::AbstractAdapter)
|
113
113
|
end
|
114
114
|
end
|
115
115
|
end
|
@@ -2,15 +2,14 @@ module Daru
|
|
2
2
|
module Maths
|
3
3
|
# Module encapsulating all aritmetic methods on DataFrame.
|
4
4
|
module Arithmetic
|
5
|
-
module DataFrame
|
6
|
-
|
5
|
+
module DataFrame
|
7
6
|
# Add a scalar or another DataFrame
|
8
7
|
def + other
|
9
8
|
binary_operation :+, other
|
10
9
|
end
|
11
10
|
|
12
11
|
# Subtract a scalar or another DataFrame.
|
13
|
-
def - other
|
12
|
+
def - other
|
14
13
|
binary_operation :-, other
|
15
14
|
end
|
16
15
|
|
@@ -19,7 +18,7 @@ module Daru
|
|
19
18
|
binary_operation :*, other
|
20
19
|
end
|
21
20
|
|
22
|
-
# Divide a scalar or another DataFrame.
|
21
|
+
# Divide a scalar or another DataFrame.
|
23
22
|
def / other
|
24
23
|
binary_operation :/, other
|
25
24
|
end
|
@@ -36,18 +35,19 @@ module Daru
|
|
36
35
|
|
37
36
|
# Calculate exponenential of all vectors with numeric values.
|
38
37
|
def exp
|
39
|
-
only_numerics(clone: false).recode
|
38
|
+
only_numerics(clone: false).recode(&:exp)
|
40
39
|
end
|
41
40
|
|
42
41
|
# Calcuate square root of numeric vectors.
|
43
42
|
def sqrt
|
44
|
-
only_numerics(clone: false).recode
|
43
|
+
only_numerics(clone: false).recode(&:sqrt)
|
45
44
|
end
|
46
45
|
|
47
46
|
def round precision=0
|
48
47
|
only_numerics(clone: false).recode { |v| v.round(precision) }
|
49
48
|
end
|
50
|
-
|
49
|
+
|
50
|
+
private
|
51
51
|
|
52
52
|
def binary_operation operation, other
|
53
53
|
case other
|
@@ -59,27 +59,27 @@ module Daru
|
|
59
59
|
end
|
60
60
|
|
61
61
|
def dataframe_binary_operation operation, other
|
62
|
-
all_vectors = (
|
63
|
-
all_indexes = (
|
62
|
+
all_vectors = (vectors.to_a | other.vectors.to_a).sort
|
63
|
+
all_indexes = (index.to_a | other.index.to_a).sort
|
64
64
|
|
65
65
|
hsh = {}
|
66
66
|
all_vectors.each do |vector_name|
|
67
|
-
this =
|
67
|
+
this = has_vector?(vector_name) ? self[vector_name] : nil
|
68
68
|
that = other.has_vector?(vector_name) ? other[vector_name] : nil
|
69
69
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
name: vector_name)
|
75
|
-
|
70
|
+
hsh[vector_name] =
|
71
|
+
if this && that
|
72
|
+
this.send(operation, that)
|
73
|
+
else
|
74
|
+
Daru::Vector.new([], index: all_indexes, name: vector_name)
|
75
|
+
end
|
76
76
|
end
|
77
77
|
|
78
|
-
Daru::DataFrame.new(hsh, index: all_indexes, name: @name, dtype: @dtype)
|
78
|
+
Daru::DataFrame.new(hsh, index: all_indexes, name: @name, dtype: @dtype)
|
79
79
|
end
|
80
80
|
|
81
81
|
def scalar_binary_operation operation, other
|
82
|
-
clone =
|
82
|
+
clone = dup
|
83
83
|
clone.map_vectors! do |vector|
|
84
84
|
vector = vector.send(operation, other) if vector.type == :numeric
|
85
85
|
vector
|
@@ -90,4 +90,4 @@ module Daru
|
|
90
90
|
end
|
91
91
|
end
|
92
92
|
end
|
93
|
-
end
|
93
|
+
end
|
@@ -35,17 +35,17 @@ module Daru
|
|
35
35
|
end
|
36
36
|
|
37
37
|
def abs
|
38
|
-
|
38
|
+
recode { |e| e.abs unless e.nil? }
|
39
39
|
end
|
40
40
|
|
41
41
|
def round precision=0
|
42
|
-
|
42
|
+
recode { |e| e.round(precision) unless e.nil? }
|
43
43
|
end
|
44
44
|
|
45
|
-
|
45
|
+
private
|
46
46
|
|
47
47
|
def math_unary_op operation
|
48
|
-
|
48
|
+
recode { |e| Math.send(operation, e) unless e.nil? }
|
49
49
|
end
|
50
50
|
|
51
51
|
def binary_op operation, other
|
@@ -58,8 +58,8 @@ module Daru
|
|
58
58
|
end
|
59
59
|
|
60
60
|
def v2o_binary operation, other
|
61
|
-
Daru::Vector.new
|
62
|
-
|
61
|
+
Daru::Vector.new map { |e| e.nil? ? nil : e.send(operation, other) },
|
62
|
+
name: @name, index: @index
|
63
63
|
end
|
64
64
|
|
65
65
|
def v2v_binary operation, other
|
@@ -71,13 +71,8 @@ module Daru
|
|
71
71
|
this = self.index.include?(idx) ? self[idx] : nil
|
72
72
|
that = other.index.include?(idx) ? other[idx] : nil
|
73
73
|
|
74
|
-
|
75
|
-
|
76
|
-
common_idxs << idx
|
77
|
-
else
|
78
|
-
elements << nil
|
79
|
-
common_idxs << idx
|
80
|
-
end
|
74
|
+
elements << (this && that ? this.send(operation, that) : nil)
|
75
|
+
common_idxs << idx
|
81
76
|
end
|
82
77
|
|
83
78
|
Daru::Vector.new(elements, name: @name, index: common_idxs)
|
@@ -85,4 +80,4 @@ module Daru
|
|
85
80
|
end
|
86
81
|
end
|
87
82
|
end
|
88
|
-
end
|
83
|
+
end
|
@@ -2,45 +2,41 @@ module Daru
|
|
2
2
|
module Maths
|
3
3
|
module Statistics
|
4
4
|
module DataFrame
|
5
|
-
#
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
#
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
#
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
#
|
21
|
-
|
22
|
-
|
5
|
+
# @!method mean
|
6
|
+
# Calculate mean of numeric vectors
|
7
|
+
# @!method variance_sample
|
8
|
+
# Calculate sample variance of numeric vectors
|
9
|
+
# @!method range
|
10
|
+
# Calculate range of numeric vectors
|
11
|
+
# @!method median
|
12
|
+
# Calculate median of numeric vectors
|
13
|
+
# @!method mode
|
14
|
+
# Calculate mode of numeric vectors
|
15
|
+
# @!method std
|
16
|
+
# Calculate sample standard deviation of numeric vectors
|
17
|
+
# @!method sum
|
18
|
+
# Calculate sum of numeric vectors
|
19
|
+
# @!method count
|
20
|
+
# Count the number of non-nil values in each vector
|
21
|
+
# @!method min
|
22
|
+
# Calculate the minimum value of each numeric vector
|
23
|
+
# @!method product
|
24
|
+
# Compute the product of each numeric vector
|
25
|
+
[:mean, :variance_sample, :range, :median, :mode, :std, :sum, :count, :min, :product].each do |meth|
|
26
|
+
define_method(meth) do
|
27
|
+
compute_stats meth
|
28
|
+
end
|
23
29
|
end
|
24
30
|
|
25
31
|
# Calculate the maximum value of each numeric vector.
|
26
32
|
def max opts={}
|
27
33
|
if opts[:vector]
|
28
|
-
|
34
|
+
row[*self[opts[:vector]].max_index.index.to_a]
|
29
35
|
else
|
30
36
|
compute_stats :max
|
31
37
|
end
|
32
38
|
end
|
33
39
|
|
34
|
-
# Calculate the minimmum value of each numeric vector.
|
35
|
-
def min
|
36
|
-
compute_stats :min
|
37
|
-
end
|
38
|
-
|
39
|
-
# Compute the product of each numeric vector.
|
40
|
-
def product
|
41
|
-
compute_stats :product
|
42
|
-
end
|
43
|
-
|
44
40
|
# @!method cumsum
|
45
41
|
# Calculate cumulative sum of each numeric Vector
|
46
42
|
# @!method standardize
|
@@ -51,7 +47,7 @@ module Daru
|
|
51
47
|
# @!method ema
|
52
48
|
# Calculate exponential moving average.
|
53
49
|
# @param [Integer] n (10) Loopback length.
|
54
|
-
# @param [TrueClass, FalseClass, NilClass] wilder (false) If true,
|
50
|
+
# @param [TrueClass, FalseClass, NilClass] wilder (false) If true,
|
55
51
|
# 1/n value is used for smoothing; if false, uses 2/(n+1) value.
|
56
52
|
# @!method rolling_mean
|
57
53
|
# Calculate moving averages
|
@@ -74,21 +70,22 @@ module Daru
|
|
74
70
|
# @!method rolling_variance
|
75
71
|
# Calculate moving variance
|
76
72
|
# @param [Integer] n (10) Loopback length. Default to 10.
|
77
|
-
[
|
78
|
-
|
73
|
+
[
|
74
|
+
:cumsum,:standardize,:acf,:ema,:rolling_mean,:rolling_median,:rolling_max,
|
75
|
+
:rolling_min,:rolling_count,:rolling_std,:rolling_variance, :rolling_sum
|
79
76
|
].each do |meth|
|
80
77
|
define_method(meth) do |*args|
|
81
78
|
apply_method_to_numerics meth, *args
|
82
79
|
end
|
83
80
|
end
|
84
81
|
|
85
|
-
# Create a summary of mean, standard deviation, count, max and min of
|
82
|
+
# Create a summary of mean, standard deviation, count, max and min of
|
86
83
|
# each numeric vector in the dataframe in one shot.
|
87
|
-
#
|
84
|
+
#
|
88
85
|
# == Arguments
|
89
|
-
#
|
90
|
-
# +methods+ - An array with aggregation methods specified as symbols to
|
91
|
-
# be applied to numeric vectors. Default is [:count, :mean, :std, :max,
|
86
|
+
#
|
87
|
+
# +methods+ - An array with aggregation methods specified as symbols to
|
88
|
+
# be applied to numeric vectors. Default is [:count, :mean, :std, :max,
|
92
89
|
# :min]. Methods will be applied in the specified order.
|
93
90
|
def describe methods=nil
|
94
91
|
methods ||= [:count, :mean, :std, :min, :max]
|
@@ -100,23 +97,54 @@ module Daru
|
|
100
97
|
Daru::DataFrame.new(description_hash, index: methods)
|
101
98
|
end
|
102
99
|
|
100
|
+
# The percent_change method computes the percent change over
|
101
|
+
# the given number of periods for numeric vectors.
|
102
|
+
#
|
103
|
+
# @param [Integer] periods (1) number of nils to insert at the beginning.
|
104
|
+
#
|
105
|
+
# @example
|
106
|
+
#
|
107
|
+
# df = Daru::DataFrame.new({
|
108
|
+
# 'col0' => [1,2,3,4,5,6],
|
109
|
+
# 'col2' => ['a','b','c','d','e','f'],
|
110
|
+
# 'col1' => [11,22,33,44,55,66]
|
111
|
+
# },
|
112
|
+
# index: ['one', 'two', 'three', 'four', 'five', 'six'],
|
113
|
+
# order: ['col0', 'col1', 'col2'])
|
114
|
+
# df.percent_change
|
115
|
+
# #=>
|
116
|
+
# # <Daru::DataFrame:23513280 @rows: 6 @cols: 2>
|
117
|
+
# # col0 col1
|
118
|
+
# # one
|
119
|
+
# # two 1.0 1.0
|
120
|
+
# # three 0.5 0.5
|
121
|
+
# # four 0.3333333333333333 0.3333333333333333
|
122
|
+
# # five 0.25 0.25
|
123
|
+
# # six 0.2 0.2
|
124
|
+
def percent_change periods=1
|
125
|
+
df_numeric = only_numerics.vectors.to_a
|
126
|
+
df = Daru::DataFrame.new({}, order: @order, index: @index, name: @name)
|
127
|
+
df_numeric.each do |vec|
|
128
|
+
df[vec] = self[vec].percent_change periods
|
129
|
+
end
|
130
|
+
df
|
131
|
+
end
|
132
|
+
|
103
133
|
# Calculate sample variance-covariance between the numeric vectors.
|
104
134
|
def covariance
|
105
135
|
cache={}
|
106
|
-
vectors =
|
136
|
+
vectors = numeric_vectors
|
107
137
|
|
108
138
|
mat_rows = vectors.collect do |row|
|
109
139
|
vectors.collect do |col|
|
110
140
|
if row == col
|
111
141
|
self[row].variance
|
142
|
+
elsif cache[[col,row]].nil?
|
143
|
+
cov = vector_cov(self[row],self[col])
|
144
|
+
cache[[row,col]] = cov
|
145
|
+
cov
|
112
146
|
else
|
113
|
-
|
114
|
-
cov = vector_cov(self[row],self[col])
|
115
|
-
cache[[row,col]] = cov
|
116
|
-
cov
|
117
|
-
else
|
118
|
-
cache[[col,row]]
|
119
|
-
end
|
147
|
+
cache[[col,row]]
|
120
148
|
end
|
121
149
|
end
|
122
150
|
end
|
@@ -125,34 +153,33 @@ module Daru
|
|
125
153
|
end
|
126
154
|
|
127
155
|
alias :cov :covariance
|
128
|
-
|
156
|
+
|
129
157
|
# Calculate the correlation between the numeric vectors.
|
130
158
|
def correlation
|
131
159
|
standard_deviation = std.to_matrix
|
132
|
-
corr_arry =
|
133
|
-
|
134
|
-
|
135
|
-
standard_deviation)
|
160
|
+
corr_arry = cov
|
161
|
+
.to_matrix
|
162
|
+
.elementwise_division(standard_deviation.transpose *
|
163
|
+
standard_deviation).to_a
|
136
164
|
|
137
165
|
Daru::DataFrame.rows(corr_arry, index: numeric_vectors, order: numeric_vectors)
|
138
166
|
end
|
139
167
|
|
140
168
|
alias :corr :correlation
|
141
169
|
|
142
|
-
|
170
|
+
private
|
143
171
|
|
144
172
|
def apply_method_to_numerics method, *args
|
145
173
|
order = []
|
146
|
-
computed = @vectors.to_a.
|
174
|
+
computed = @vectors.to_a.each_with_object([]) do |n, memo|
|
147
175
|
v = @data[@vectors[n]]
|
148
176
|
if v.type == :numeric
|
149
177
|
memo << v.send(method, *args)
|
150
178
|
order << n
|
151
179
|
end
|
152
|
-
memo
|
153
180
|
end
|
154
|
-
|
155
|
-
Daru::DataFrame.new(computed, index: @index, order: order
|
181
|
+
|
182
|
+
Daru::DataFrame.new(computed, index: @index, order: order,clone: false)
|
156
183
|
end
|
157
184
|
|
158
185
|
def vector_cov v1a, v2a
|
@@ -160,23 +187,24 @@ module Daru
|
|
160
187
|
end
|
161
188
|
|
162
189
|
def sum_of_squares v1, v2
|
163
|
-
v1a,v2a = v1.only_valid
|
190
|
+
v1a,v2a = v1.only_valid,v2.only_valid
|
164
191
|
v1a.reset_index!
|
165
|
-
v2a.reset_index!
|
192
|
+
v2a.reset_index!
|
166
193
|
m1 = v1a.mean
|
167
194
|
m2 = v2a.mean
|
168
|
-
|
195
|
+
v1a.size.times.inject(0) { |ac,i| ac+(v1a[i]-m1)*(v2a[i]-m2) }
|
169
196
|
end
|
170
197
|
|
171
198
|
def compute_stats method
|
172
199
|
Daru::Vector.new(
|
173
|
-
numeric_vectors.
|
200
|
+
numeric_vectors.each_with_object({}) do |vec, hash|
|
174
201
|
hash[vec] = self[vec].send(method)
|
175
|
-
hash
|
176
202
|
end, name: method
|
177
203
|
)
|
178
204
|
end
|
205
|
+
alias :sds :std
|
206
|
+
alias :variance :variance_sample
|
179
207
|
end
|
180
208
|
end
|
181
209
|
end
|
182
|
-
end
|
210
|
+
end
|