daru 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +0 -0
- data/Gemfile +0 -1
- data/History.txt +35 -0
- data/README.md +178 -198
- data/daru.gemspec +5 -7
- data/lib/daru.rb +10 -2
- data/lib/daru/accessors/array_wrapper.rb +36 -198
- data/lib/daru/accessors/nmatrix_wrapper.rb +60 -209
- data/lib/daru/core/group_by.rb +183 -0
- data/lib/daru/dataframe.rb +615 -167
- data/lib/daru/index.rb +17 -16
- data/lib/daru/io/io.rb +5 -12
- data/lib/daru/maths/arithmetic/dataframe.rb +72 -8
- data/lib/daru/maths/arithmetic/vector.rb +19 -6
- data/lib/daru/maths/statistics/dataframe.rb +103 -2
- data/lib/daru/maths/statistics/vector.rb +102 -61
- data/lib/daru/monkeys.rb +8 -0
- data/lib/daru/multi_index.rb +199 -0
- data/lib/daru/plotting/dataframe.rb +24 -24
- data/lib/daru/plotting/vector.rb +14 -15
- data/lib/daru/vector.rb +402 -98
- data/lib/version.rb +1 -1
- data/notebooks/grouping_splitting_pivots.ipynb +529 -0
- data/notebooks/intro_with_music_data_.ipynb +104 -119
- data/spec/accessors/wrappers_spec.rb +36 -0
- data/spec/core/group_by_spec.rb +331 -0
- data/spec/dataframe_spec.rb +1237 -475
- data/spec/fixtures/sales-funnel.csv +18 -0
- data/spec/index_spec.rb +10 -21
- data/spec/io/io_spec.rb +4 -14
- data/spec/math/arithmetic/dataframe_spec.rb +66 -0
- data/spec/math/arithmetic/vector_spec.rb +45 -4
- data/spec/math/statistics/dataframe_spec.rb +91 -1
- data/spec/math/statistics/vector_spec.rb +32 -6
- data/spec/monkeys_spec.rb +10 -1
- data/spec/multi_index_spec.rb +216 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/vector_spec.rb +505 -57
- metadata +21 -15
data/lib/daru/index.rb
CHANGED
@@ -2,10 +2,13 @@ module Daru
|
|
2
2
|
class Index
|
3
3
|
include Enumerable
|
4
4
|
|
5
|
-
# needs to iterate over keys sorted by their values. Happens right now by
|
6
|
-
# virtue of ordered Hashes (ruby).
|
7
5
|
def each(&block)
|
8
6
|
@relation_hash.each_key(&block)
|
7
|
+
self
|
8
|
+
end
|
9
|
+
|
10
|
+
def map(&block)
|
11
|
+
to_a.map(&block)
|
9
12
|
end
|
10
13
|
|
11
14
|
attr_reader :relation_hash
|
@@ -19,6 +22,7 @@ module Daru
|
|
19
22
|
|
20
23
|
index = 0 if index.nil?
|
21
24
|
index = Array.new(index) { |i| i} if index.is_a? Integer
|
25
|
+
index = index.to_a if index.is_a? Daru::Index
|
22
26
|
|
23
27
|
if values.nil?
|
24
28
|
index.each_with_index do |n, idx|
|
@@ -36,7 +40,6 @@ module Daru
|
|
36
40
|
end
|
37
41
|
|
38
42
|
@relation_hash.freeze
|
39
|
-
|
40
43
|
@size = @relation_hash.size
|
41
44
|
|
42
45
|
if index[0].is_a?(Integer)
|
@@ -49,22 +52,28 @@ module Daru
|
|
49
52
|
def ==(other)
|
50
53
|
return false if other.size != @size
|
51
54
|
|
52
|
-
@relation_hash.keys == other.to_a
|
55
|
+
@relation_hash.keys == other.to_a and @relation_hash.values == other.relation_hash.values
|
53
56
|
end
|
54
57
|
|
55
58
|
def [](key)
|
56
59
|
case key
|
57
60
|
when Range
|
58
|
-
first
|
59
|
-
|
61
|
+
if key.first.is_a?(Integer) and key.last.is_a?(Integer)
|
62
|
+
first = key.first
|
63
|
+
last = key.last
|
64
|
+
else
|
65
|
+
first = @relation_hash[key.first]
|
66
|
+
last = @relation_hash[key.last]
|
67
|
+
end
|
60
68
|
|
61
69
|
indexes = []
|
62
|
-
|
63
70
|
(first..last).each do |idx|
|
64
71
|
indexes << @relation_hash.key(idx)
|
65
72
|
end
|
66
73
|
|
67
74
|
Daru::Index.new indexes, (first..last).to_a
|
75
|
+
when Array # works only with numeric indices
|
76
|
+
Daru::Index.new key.map { |k| @relation_hash.key(k) }, key
|
68
77
|
else
|
69
78
|
@relation_hash[key]
|
70
79
|
end
|
@@ -85,11 +94,7 @@ module Daru
|
|
85
94
|
end
|
86
95
|
|
87
96
|
def key(value)
|
88
|
-
@relation_hash.
|
89
|
-
end
|
90
|
-
|
91
|
-
def re_index new_index
|
92
|
-
new_index.to_index
|
97
|
+
@relation_hash.keys[value]
|
93
98
|
end
|
94
99
|
|
95
100
|
def include? index
|
@@ -99,9 +104,5 @@ module Daru
|
|
99
104
|
def dup
|
100
105
|
Daru::Index.new @relation_hash.keys
|
101
106
|
end
|
102
|
-
|
103
|
-
def to_index
|
104
|
-
self
|
105
|
-
end
|
106
107
|
end
|
107
108
|
end
|
data/lib/daru/io/io.rb
CHANGED
@@ -7,23 +7,16 @@ module Daru
|
|
7
7
|
opts[:converters] ||= :numeric
|
8
8
|
opts[:header_converters] ||= :symbol
|
9
9
|
|
10
|
-
csv = CSV.
|
10
|
+
csv = CSV.read(path, 'r', opts)
|
11
11
|
|
12
12
|
yield csv if block_given?
|
13
13
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
csv.each_with_index do |row, index|
|
18
|
-
if first
|
19
|
-
df = Daru::DataFrame.new({}, order: csv.headers, name: opts[:name])
|
20
|
-
first = false
|
21
|
-
end
|
22
|
-
|
23
|
-
df.row[index] = row.fields
|
14
|
+
hsh = {}
|
15
|
+
csv.by_col!.each do |col_name, values|
|
16
|
+
hsh[col_name] = values
|
24
17
|
end
|
25
18
|
|
26
|
-
|
19
|
+
Daru::DataFrame.new(hsh)
|
27
20
|
end
|
28
21
|
end
|
29
22
|
end
|
@@ -1,26 +1,90 @@
|
|
1
1
|
module Daru
|
2
2
|
module Maths
|
3
|
+
# Module encapsulating all aritmetic methods on DataFrame.
|
3
4
|
module Arithmetic
|
4
|
-
module DataFrame
|
5
|
-
|
5
|
+
module DataFrame
|
6
|
+
|
7
|
+
# Add a scalar or another DataFrame
|
6
8
|
def + other
|
7
|
-
|
9
|
+
binary_operation :+, other
|
8
10
|
end
|
9
11
|
|
10
|
-
|
11
|
-
|
12
|
+
# Subtract a scalar or another DataFrame.
|
13
|
+
def - other
|
14
|
+
binary_operation :-, other
|
12
15
|
end
|
13
16
|
|
17
|
+
# Multiply a scalar or another DataFrame.
|
14
18
|
def * other
|
15
|
-
|
19
|
+
binary_operation :*, other
|
16
20
|
end
|
17
21
|
|
22
|
+
# Divide a scalar or another DataFrame.
|
18
23
|
def / other
|
19
|
-
|
24
|
+
binary_operation :/, other
|
20
25
|
end
|
21
26
|
|
27
|
+
# Modulus with a scalar or another DataFrame.
|
22
28
|
def % other
|
23
|
-
|
29
|
+
binary_operation :%, other
|
30
|
+
end
|
31
|
+
|
32
|
+
# Exponent with a scalar or another DataFrame.
|
33
|
+
def ** other
|
34
|
+
binary_operation :**, other
|
35
|
+
end
|
36
|
+
|
37
|
+
# Calculate exponenential of all vectors with numeric values.
|
38
|
+
def exp
|
39
|
+
self.dup.map_vectors! { |v| v.exp if v.type == :numeric }
|
40
|
+
end
|
41
|
+
|
42
|
+
def sqrt
|
43
|
+
self.dup.map_vectors! { |v| v.sqrt if v.type == :numeric }
|
44
|
+
end
|
45
|
+
|
46
|
+
def round precision=0
|
47
|
+
self.dup.map_vectors! { |v| v.round(precision) if v.type == :numeric }
|
48
|
+
end
|
49
|
+
private
|
50
|
+
|
51
|
+
def binary_operation operation, other
|
52
|
+
case other
|
53
|
+
when Daru::DataFrame
|
54
|
+
dataframe_binary_operation operation, other
|
55
|
+
else
|
56
|
+
scalar_binary_operation operation, other
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def dataframe_binary_operation operation, other
|
61
|
+
all_vectors = (self.vectors.to_a | other.vectors.to_a).sort
|
62
|
+
all_indexes = (self.index.to_a | other.index.to_a).sort
|
63
|
+
|
64
|
+
hsh = {}
|
65
|
+
all_vectors.each do |vector_name|
|
66
|
+
this = self .has_vector?(vector_name) ? self .vector[vector_name] : nil
|
67
|
+
that = other.has_vector?(vector_name) ? other.vector[vector_name] : nil
|
68
|
+
|
69
|
+
if this and that
|
70
|
+
hsh[vector_name] = this.send(operation, that)
|
71
|
+
else
|
72
|
+
hsh[vector_name] = Daru::Vector.new([], index: all_indexes,
|
73
|
+
name: vector_name)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
Daru::DataFrame.new(hsh, index: all_indexes, name: @name, dtype: @dtype)
|
78
|
+
end
|
79
|
+
|
80
|
+
def scalar_binary_operation operation, other
|
81
|
+
clone = self.dup
|
82
|
+
clone.map_vectors! do |vector|
|
83
|
+
vector = vector.send(operation, other) if vector.type == :numeric
|
84
|
+
vector
|
85
|
+
end
|
86
|
+
|
87
|
+
clone
|
24
88
|
end
|
25
89
|
end
|
26
90
|
end
|
@@ -27,19 +27,27 @@ module Daru
|
|
27
27
|
end
|
28
28
|
|
29
29
|
def exp
|
30
|
-
|
30
|
+
math_unary_op :exp
|
31
31
|
end
|
32
32
|
|
33
33
|
def sqrt
|
34
|
-
|
34
|
+
math_unary_op :sqrt
|
35
35
|
end
|
36
36
|
|
37
|
-
def
|
38
|
-
|
37
|
+
def abs
|
38
|
+
self.dup.map! { |e| e.abs unless e.nil? }
|
39
|
+
end
|
40
|
+
|
41
|
+
def round precision=0
|
42
|
+
self.dup.map! { |e| e.round(precision) unless e.nil? }
|
39
43
|
end
|
40
44
|
|
41
45
|
private
|
42
46
|
|
47
|
+
def math_unary_op operation
|
48
|
+
self.dup.map! { |e| Math.send(operation, e) unless e.nil? }
|
49
|
+
end
|
50
|
+
|
43
51
|
def binary_op operation, other
|
44
52
|
case other
|
45
53
|
when Daru::Vector
|
@@ -50,20 +58,25 @@ module Daru
|
|
50
58
|
end
|
51
59
|
|
52
60
|
def v2o_binary operation, other
|
53
|
-
Daru::Vector.new self.map { |e| e.send(operation, other) },
|
61
|
+
Daru::Vector.new self.map { |e| e.nil? ? nil : e.send(operation, other) },
|
62
|
+
name: @name, index: @index
|
54
63
|
end
|
55
64
|
|
56
65
|
def v2v_binary operation, other
|
57
66
|
common_idxs = []
|
58
67
|
elements = []
|
68
|
+
index = (@index.to_a + other.index.to_a).uniq.sort
|
59
69
|
|
60
|
-
|
70
|
+
index.each do |idx|
|
61
71
|
this = self[idx]
|
62
72
|
that = other[idx]
|
63
73
|
|
64
74
|
if this and that
|
65
75
|
elements << this.send(operation ,that)
|
66
76
|
common_idxs << idx
|
77
|
+
else
|
78
|
+
elements << nil
|
79
|
+
common_idxs << idx
|
67
80
|
end
|
68
81
|
end
|
69
82
|
|
@@ -2,8 +2,109 @@ module Daru
|
|
2
2
|
module Maths
|
3
3
|
module Statistics
|
4
4
|
module DataFrame
|
5
|
-
|
6
|
-
|
5
|
+
# Calculate mean of numeric vectors.
|
6
|
+
def mean
|
7
|
+
compute_stats :mean
|
8
|
+
end
|
9
|
+
|
10
|
+
# Calculate sample standard deviation of numeric vectors.
|
11
|
+
def std
|
12
|
+
compute_stats :std
|
13
|
+
end
|
14
|
+
|
15
|
+
# Calculate sum of numeric vectors
|
16
|
+
def sum
|
17
|
+
compute_stats :sum
|
18
|
+
end
|
19
|
+
|
20
|
+
# Count the number of non-nil values in each vector.
|
21
|
+
def count
|
22
|
+
compute_stats :count
|
23
|
+
end
|
24
|
+
|
25
|
+
# Calculate the maximum value of each numeric vector.
|
26
|
+
def max
|
27
|
+
compute_stats :max
|
28
|
+
end
|
29
|
+
|
30
|
+
# Calculate the minimmum value of each numeric vector.
|
31
|
+
def min
|
32
|
+
compute_stats :min
|
33
|
+
end
|
34
|
+
|
35
|
+
# Compute the product of each numeric vector.
|
36
|
+
def product
|
37
|
+
compute_stats :product
|
38
|
+
end
|
39
|
+
|
40
|
+
# Create a summary of mean, standard deviation, count, max and min of
|
41
|
+
# each numeric vector in the dataframe in one shot.
|
42
|
+
#
|
43
|
+
# == Arguments
|
44
|
+
#
|
45
|
+
# +methods+ - An array with aggregation methods specified as symbols to
|
46
|
+
# be applied to numeric vectors. Default is [:count, :mean, :std, :max,
|
47
|
+
# :min]. Methods will be applied in the specified order.
|
48
|
+
def describe methods=nil
|
49
|
+
methods ||= [:count, :mean, :std, :min, :max]
|
50
|
+
|
51
|
+
description_hash = {}
|
52
|
+
numeric_vectors.each do |vec|
|
53
|
+
description_hash[vec] = methods.map { |m| self[vec].send(m) }
|
54
|
+
end
|
55
|
+
Daru::DataFrame.new(description_hash, index: methods)
|
56
|
+
end
|
57
|
+
|
58
|
+
# Calculate variance-covariance between the numeric vectors.
|
59
|
+
#
|
60
|
+
# == Arguments
|
61
|
+
#
|
62
|
+
# +for_sample_data+ - If set to false, will calculate the population
|
63
|
+
# covariance (denominator N), otherwise calculates the sample covariance
|
64
|
+
# matrix. Default to true.
|
65
|
+
def covariance for_sample_data=true
|
66
|
+
cov_arry =
|
67
|
+
if defined? NMatrix and NMatrix.respond_to?(:cov)
|
68
|
+
to_nmatrix.cov(for_sample_data).to_a
|
69
|
+
else
|
70
|
+
df_as_matrix = to_matrix
|
71
|
+
denominator = for_sample_data ? rows - 1 : rows
|
72
|
+
ones = Matrix.column_vector [1]*rows
|
73
|
+
deviation_scores = df_as_matrix - (ones * ones.transpose * df_as_matrix) / rows
|
74
|
+
((deviation_scores.transpose * deviation_scores) / denominator).to_a
|
75
|
+
end
|
76
|
+
|
77
|
+
Daru::DataFrame.rows(cov_arry, index: numeric_vectors, order: numeric_vectors)
|
78
|
+
end
|
79
|
+
|
80
|
+
alias :cov :covariance
|
81
|
+
|
82
|
+
# Calculate the correlation between the numeric vectors.
|
83
|
+
def correlation
|
84
|
+
corr_arry =
|
85
|
+
if defined? NMatrix and NMatrix.respond_to?(:corr)
|
86
|
+
to_nmatrix.corr.to_a
|
87
|
+
else
|
88
|
+
standard_deviation = std.to_matrix
|
89
|
+
(cov.to_matrix.elementwise_division(standard_deviation.transpose *
|
90
|
+
standard_deviation)).to_a
|
91
|
+
end
|
92
|
+
|
93
|
+
Daru::DataFrame.rows(corr_arry, index: numeric_vectors, order: numeric_vectors)
|
94
|
+
end
|
95
|
+
|
96
|
+
alias :corr :correlation
|
97
|
+
|
98
|
+
private
|
99
|
+
|
100
|
+
def compute_stats method
|
101
|
+
Daru::Vector.new(
|
102
|
+
numeric_vectors.inject({}) do |hash, vec|
|
103
|
+
hash[vec] = self[vec].send(method)
|
104
|
+
hash
|
105
|
+
end
|
106
|
+
)
|
107
|
+
end
|
7
108
|
end
|
8
109
|
end
|
9
110
|
end
|
@@ -1,47 +1,59 @@
|
|
1
1
|
module Daru
|
2
2
|
module Maths
|
3
|
+
# Encapsulates statistics methods for vectors. Most basic stuff like mean, etc.
|
4
|
+
# is done inside the wrapper, so that native methods can be used for most of
|
5
|
+
# the computationally intensive tasks.
|
3
6
|
module Statistics
|
4
7
|
module Vector
|
5
|
-
|
6
8
|
def mean
|
7
|
-
@
|
9
|
+
@data.mean
|
8
10
|
end
|
9
11
|
|
10
|
-
def
|
11
|
-
@
|
12
|
+
def sum
|
13
|
+
@data.sum
|
12
14
|
end
|
13
15
|
|
14
|
-
def
|
15
|
-
@
|
16
|
+
def product
|
17
|
+
@data.product
|
16
18
|
end
|
17
19
|
|
18
|
-
def
|
19
|
-
@
|
20
|
+
def min
|
21
|
+
@data.min
|
20
22
|
end
|
21
23
|
|
22
|
-
def
|
23
|
-
|
24
|
+
def range
|
25
|
+
max - min
|
26
|
+
end
|
27
|
+
|
28
|
+
def median
|
29
|
+
percentile 50
|
30
|
+
end
|
31
|
+
|
32
|
+
def mode
|
33
|
+
freqs = frequencies.values
|
34
|
+
@data[freqs.index(freqs.max)]
|
24
35
|
end
|
25
36
|
|
26
37
|
def median_absolute_deviation
|
27
|
-
|
38
|
+
m = median
|
39
|
+
map {|val| (val - m).abs }.median
|
28
40
|
end
|
29
41
|
|
30
42
|
def standard_error
|
31
|
-
@
|
43
|
+
standard_deviation_sample/(Math::sqrt((@size - @nil_positions.size)))
|
32
44
|
end
|
33
45
|
|
34
46
|
def sum_of_squared_deviation
|
35
|
-
@
|
47
|
+
(@data.to_a.inject(0) { |a,x| x.square + a } - (sum.square.quo((@size - @nil_positions.size)))).to_f
|
36
48
|
end
|
37
49
|
|
38
50
|
# Maximum element of the vector.
|
39
51
|
#
|
40
52
|
# @param return_type [Symbol] Data type of the returned value. Defaults
|
41
|
-
#
|
42
|
-
#
|
53
|
+
# to returning only the maximum number but passing *:vector* will return
|
54
|
+
# a Daru::Vector with the index of the corresponding maximum value.
|
43
55
|
def max return_type=:stored_type
|
44
|
-
max_value = @
|
56
|
+
max_value = @data.max
|
45
57
|
if return_type == :vector
|
46
58
|
Daru::Vector.new({index_of(max_value) => max_value}, name: @name, dtype: @dtype)
|
47
59
|
else
|
@@ -49,98 +61,127 @@ module Daru
|
|
49
61
|
end
|
50
62
|
end
|
51
63
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
def has_missing_data?
|
57
|
-
@vector.has_missing_data?
|
58
|
-
end
|
59
|
-
|
60
|
-
def range
|
61
|
-
@vector.range
|
64
|
+
# Return a Vector with the max element and its index.
|
65
|
+
# @return [Daru::Vector]
|
66
|
+
def max_index
|
67
|
+
max :vector
|
62
68
|
end
|
63
69
|
|
64
70
|
def frequencies
|
65
|
-
@
|
71
|
+
@data.inject({}) do |hash, element|
|
72
|
+
hash[element] ||= 0
|
73
|
+
hash[element] += 1
|
74
|
+
hash
|
75
|
+
end
|
66
76
|
end
|
67
77
|
|
68
78
|
def proportions
|
69
|
-
|
79
|
+
len = n_valid
|
80
|
+
frequencies.inject({}) { |hash, arr| hash[arr[0]] = arr[1] / len; hash }
|
70
81
|
end
|
71
82
|
|
72
83
|
def ranked
|
73
|
-
|
84
|
+
sum = 0
|
85
|
+
r = frequencies.sort.inject( {} ) do |memo, val|
|
86
|
+
memo[val[0]] = ((sum + 1) + (sum + val[1])) / 2
|
87
|
+
sum += val[1]
|
88
|
+
memo
|
89
|
+
end
|
90
|
+
|
91
|
+
Daru::Vector.new @data.map { |e| r[e] }, index: self.index,
|
92
|
+
name: self.name, dtype: self.dtype, nm_dtype: self.nm_dtype
|
74
93
|
end
|
75
94
|
|
76
95
|
def coefficient_of_variation
|
77
|
-
|
96
|
+
standard_deviation_sample / mean
|
78
97
|
end
|
79
98
|
|
80
|
-
# Retrieves number of cases which comply condition.
|
81
|
-
#
|
82
|
-
#
|
83
|
-
#
|
84
|
-
# this value.
|
99
|
+
# Retrieves number of cases which comply condition. If block given,
|
100
|
+
# retrieves number of instances where block returns true. If other
|
101
|
+
# values given, retrieves the frequency for this value. If no value
|
102
|
+
# given, counts the number of non-nil elements in the Vector.
|
85
103
|
def count value=false
|
86
|
-
|
104
|
+
if block_given?
|
105
|
+
@data.inject(0){ |memo, val| memo += 1 if yield val; memo}
|
106
|
+
elsif value
|
107
|
+
val = frequencies[value]
|
108
|
+
val.nil? ? 0 : val
|
109
|
+
else
|
110
|
+
size - @nil_positions.size
|
111
|
+
end
|
87
112
|
end
|
88
113
|
|
89
114
|
def proportion value=1
|
90
|
-
|
91
|
-
end
|
92
|
-
|
93
|
-
# Population variance with denominator (N)
|
94
|
-
def variance_population m=nil
|
95
|
-
@vector.variance_population m
|
115
|
+
frequencies[value] / n_valid
|
96
116
|
end
|
97
117
|
|
98
118
|
# Sample variance with denominator (N-1)
|
99
119
|
def variance_sample m=nil
|
100
|
-
|
120
|
+
m ||= self.mean
|
121
|
+
sum_of_squares(m).quo((@size - @nil_positions.size) - 1)
|
101
122
|
end
|
102
123
|
|
103
|
-
|
104
|
-
|
124
|
+
# Population variance with denominator (N)
|
125
|
+
def variance_population m=nil
|
126
|
+
m ||= mean
|
127
|
+
sum_of_squares(m).quo((@size - @nil_positions.size)).to_f
|
105
128
|
end
|
106
129
|
|
107
|
-
def
|
108
|
-
|
130
|
+
def sum_of_squares(m=nil)
|
131
|
+
m ||= mean
|
132
|
+
@data.inject(0) { |memo, val| memo + (val - m)**2 }
|
109
133
|
end
|
110
134
|
|
111
135
|
def standard_deviation_population m=nil
|
112
|
-
|
136
|
+
m ||= mean
|
137
|
+
Math::sqrt(variance_population(m))
|
138
|
+
end
|
139
|
+
|
140
|
+
def standard_deviation_sample m=nil
|
141
|
+
Math::sqrt(variance_sample(m))
|
113
142
|
end
|
114
143
|
|
115
144
|
# Calculate skewness using (sigma(xi - mean)^3)/((N)*std_dev_sample^3)
|
116
145
|
def skew m=nil
|
117
|
-
|
146
|
+
m ||= mean
|
147
|
+
th = @data.inject(0) { |memo, val| memo + ((val - m)**3) }
|
148
|
+
th.quo ((@size - @nil_positions.size) * (standard_deviation_sample(m)**3))
|
118
149
|
end
|
119
150
|
|
120
151
|
def kurtosis m=nil
|
121
|
-
|
152
|
+
m ||= mean
|
153
|
+
fo = @data.inject(0){ |a, x| a + ((x - m) ** 4) }
|
154
|
+
fo.quo((@size - @nil_positions.size) * standard_deviation_sample(m) ** 4) - 3
|
122
155
|
end
|
123
156
|
|
124
157
|
def average_deviation_population m=nil
|
125
|
-
|
158
|
+
m ||= mean
|
159
|
+
(@data.inject(0) {|memo, val| val + (val - m).abs }) / n_valid
|
126
160
|
end
|
127
161
|
|
128
162
|
def recode!(&block)
|
129
|
-
@
|
163
|
+
@data.recode!(&block)
|
130
164
|
end
|
131
165
|
|
132
166
|
def percentile percent
|
133
|
-
@
|
167
|
+
sorted = @data.sort
|
168
|
+
v = (n_valid * percent).quo(100)
|
169
|
+
if v.to_i != v
|
170
|
+
sorted[v.round]
|
171
|
+
else
|
172
|
+
(sorted[(v - 0.5).round].to_f + sorted[(v + 0.5).round]).quo(2)
|
173
|
+
end
|
134
174
|
end
|
135
175
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
176
|
+
alias :sdp :standard_deviation_population
|
177
|
+
alias :sds :standard_deviation_sample
|
178
|
+
alias :std :sds
|
179
|
+
alias :adp :average_deviation_population
|
180
|
+
alias :cov :coefficient_of_variation
|
181
|
+
alias :variance :variance_sample
|
182
|
+
alias :sd :standard_deviation_sample
|
183
|
+
alias :ss :sum_of_squares
|
184
|
+
alias :percentil :percentile
|
144
185
|
end
|
145
186
|
end
|
146
187
|
end
|