daru 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CONTRIBUTING.md +0 -0
- data/Gemfile +0 -1
- data/History.txt +35 -0
- data/README.md +178 -198
- data/daru.gemspec +5 -7
- data/lib/daru.rb +10 -2
- data/lib/daru/accessors/array_wrapper.rb +36 -198
- data/lib/daru/accessors/nmatrix_wrapper.rb +60 -209
- data/lib/daru/core/group_by.rb +183 -0
- data/lib/daru/dataframe.rb +615 -167
- data/lib/daru/index.rb +17 -16
- data/lib/daru/io/io.rb +5 -12
- data/lib/daru/maths/arithmetic/dataframe.rb +72 -8
- data/lib/daru/maths/arithmetic/vector.rb +19 -6
- data/lib/daru/maths/statistics/dataframe.rb +103 -2
- data/lib/daru/maths/statistics/vector.rb +102 -61
- data/lib/daru/monkeys.rb +8 -0
- data/lib/daru/multi_index.rb +199 -0
- data/lib/daru/plotting/dataframe.rb +24 -24
- data/lib/daru/plotting/vector.rb +14 -15
- data/lib/daru/vector.rb +402 -98
- data/lib/version.rb +1 -1
- data/notebooks/grouping_splitting_pivots.ipynb +529 -0
- data/notebooks/intro_with_music_data_.ipynb +104 -119
- data/spec/accessors/wrappers_spec.rb +36 -0
- data/spec/core/group_by_spec.rb +331 -0
- data/spec/dataframe_spec.rb +1237 -475
- data/spec/fixtures/sales-funnel.csv +18 -0
- data/spec/index_spec.rb +10 -21
- data/spec/io/io_spec.rb +4 -14
- data/spec/math/arithmetic/dataframe_spec.rb +66 -0
- data/spec/math/arithmetic/vector_spec.rb +45 -4
- data/spec/math/statistics/dataframe_spec.rb +91 -1
- data/spec/math/statistics/vector_spec.rb +32 -6
- data/spec/monkeys_spec.rb +10 -1
- data/spec/multi_index_spec.rb +216 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/vector_spec.rb +505 -57
- metadata +21 -15
data/lib/daru/index.rb
CHANGED
@@ -2,10 +2,13 @@ module Daru
|
|
2
2
|
class Index
|
3
3
|
include Enumerable
|
4
4
|
|
5
|
-
# needs to iterate over keys sorted by their values. Happens right now by
|
6
|
-
# virtue of ordered Hashes (ruby).
|
7
5
|
def each(&block)
|
8
6
|
@relation_hash.each_key(&block)
|
7
|
+
self
|
8
|
+
end
|
9
|
+
|
10
|
+
def map(&block)
|
11
|
+
to_a.map(&block)
|
9
12
|
end
|
10
13
|
|
11
14
|
attr_reader :relation_hash
|
@@ -19,6 +22,7 @@ module Daru
|
|
19
22
|
|
20
23
|
index = 0 if index.nil?
|
21
24
|
index = Array.new(index) { |i| i} if index.is_a? Integer
|
25
|
+
index = index.to_a if index.is_a? Daru::Index
|
22
26
|
|
23
27
|
if values.nil?
|
24
28
|
index.each_with_index do |n, idx|
|
@@ -36,7 +40,6 @@ module Daru
|
|
36
40
|
end
|
37
41
|
|
38
42
|
@relation_hash.freeze
|
39
|
-
|
40
43
|
@size = @relation_hash.size
|
41
44
|
|
42
45
|
if index[0].is_a?(Integer)
|
@@ -49,22 +52,28 @@ module Daru
|
|
49
52
|
def ==(other)
|
50
53
|
return false if other.size != @size
|
51
54
|
|
52
|
-
@relation_hash.keys == other.to_a
|
55
|
+
@relation_hash.keys == other.to_a and @relation_hash.values == other.relation_hash.values
|
53
56
|
end
|
54
57
|
|
55
58
|
def [](key)
|
56
59
|
case key
|
57
60
|
when Range
|
58
|
-
first
|
59
|
-
|
61
|
+
if key.first.is_a?(Integer) and key.last.is_a?(Integer)
|
62
|
+
first = key.first
|
63
|
+
last = key.last
|
64
|
+
else
|
65
|
+
first = @relation_hash[key.first]
|
66
|
+
last = @relation_hash[key.last]
|
67
|
+
end
|
60
68
|
|
61
69
|
indexes = []
|
62
|
-
|
63
70
|
(first..last).each do |idx|
|
64
71
|
indexes << @relation_hash.key(idx)
|
65
72
|
end
|
66
73
|
|
67
74
|
Daru::Index.new indexes, (first..last).to_a
|
75
|
+
when Array # works only with numeric indices
|
76
|
+
Daru::Index.new key.map { |k| @relation_hash.key(k) }, key
|
68
77
|
else
|
69
78
|
@relation_hash[key]
|
70
79
|
end
|
@@ -85,11 +94,7 @@ module Daru
|
|
85
94
|
end
|
86
95
|
|
87
96
|
def key(value)
|
88
|
-
@relation_hash.
|
89
|
-
end
|
90
|
-
|
91
|
-
def re_index new_index
|
92
|
-
new_index.to_index
|
97
|
+
@relation_hash.keys[value]
|
93
98
|
end
|
94
99
|
|
95
100
|
def include? index
|
@@ -99,9 +104,5 @@ module Daru
|
|
99
104
|
def dup
|
100
105
|
Daru::Index.new @relation_hash.keys
|
101
106
|
end
|
102
|
-
|
103
|
-
def to_index
|
104
|
-
self
|
105
|
-
end
|
106
107
|
end
|
107
108
|
end
|
data/lib/daru/io/io.rb
CHANGED
@@ -7,23 +7,16 @@ module Daru
|
|
7
7
|
opts[:converters] ||= :numeric
|
8
8
|
opts[:header_converters] ||= :symbol
|
9
9
|
|
10
|
-
csv = CSV.
|
10
|
+
csv = CSV.read(path, 'r', opts)
|
11
11
|
|
12
12
|
yield csv if block_given?
|
13
13
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
csv.each_with_index do |row, index|
|
18
|
-
if first
|
19
|
-
df = Daru::DataFrame.new({}, order: csv.headers, name: opts[:name])
|
20
|
-
first = false
|
21
|
-
end
|
22
|
-
|
23
|
-
df.row[index] = row.fields
|
14
|
+
hsh = {}
|
15
|
+
csv.by_col!.each do |col_name, values|
|
16
|
+
hsh[col_name] = values
|
24
17
|
end
|
25
18
|
|
26
|
-
|
19
|
+
Daru::DataFrame.new(hsh)
|
27
20
|
end
|
28
21
|
end
|
29
22
|
end
|
@@ -1,26 +1,90 @@
|
|
1
1
|
module Daru
|
2
2
|
module Maths
|
3
|
+
# Module encapsulating all aritmetic methods on DataFrame.
|
3
4
|
module Arithmetic
|
4
|
-
module DataFrame
|
5
|
-
|
5
|
+
module DataFrame
|
6
|
+
|
7
|
+
# Add a scalar or another DataFrame
|
6
8
|
def + other
|
7
|
-
|
9
|
+
binary_operation :+, other
|
8
10
|
end
|
9
11
|
|
10
|
-
|
11
|
-
|
12
|
+
# Subtract a scalar or another DataFrame.
|
13
|
+
def - other
|
14
|
+
binary_operation :-, other
|
12
15
|
end
|
13
16
|
|
17
|
+
# Multiply a scalar or another DataFrame.
|
14
18
|
def * other
|
15
|
-
|
19
|
+
binary_operation :*, other
|
16
20
|
end
|
17
21
|
|
22
|
+
# Divide a scalar or another DataFrame.
|
18
23
|
def / other
|
19
|
-
|
24
|
+
binary_operation :/, other
|
20
25
|
end
|
21
26
|
|
27
|
+
# Modulus with a scalar or another DataFrame.
|
22
28
|
def % other
|
23
|
-
|
29
|
+
binary_operation :%, other
|
30
|
+
end
|
31
|
+
|
32
|
+
# Exponent with a scalar or another DataFrame.
|
33
|
+
def ** other
|
34
|
+
binary_operation :**, other
|
35
|
+
end
|
36
|
+
|
37
|
+
# Calculate exponenential of all vectors with numeric values.
|
38
|
+
def exp
|
39
|
+
self.dup.map_vectors! { |v| v.exp if v.type == :numeric }
|
40
|
+
end
|
41
|
+
|
42
|
+
def sqrt
|
43
|
+
self.dup.map_vectors! { |v| v.sqrt if v.type == :numeric }
|
44
|
+
end
|
45
|
+
|
46
|
+
def round precision=0
|
47
|
+
self.dup.map_vectors! { |v| v.round(precision) if v.type == :numeric }
|
48
|
+
end
|
49
|
+
private
|
50
|
+
|
51
|
+
def binary_operation operation, other
|
52
|
+
case other
|
53
|
+
when Daru::DataFrame
|
54
|
+
dataframe_binary_operation operation, other
|
55
|
+
else
|
56
|
+
scalar_binary_operation operation, other
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def dataframe_binary_operation operation, other
|
61
|
+
all_vectors = (self.vectors.to_a | other.vectors.to_a).sort
|
62
|
+
all_indexes = (self.index.to_a | other.index.to_a).sort
|
63
|
+
|
64
|
+
hsh = {}
|
65
|
+
all_vectors.each do |vector_name|
|
66
|
+
this = self .has_vector?(vector_name) ? self .vector[vector_name] : nil
|
67
|
+
that = other.has_vector?(vector_name) ? other.vector[vector_name] : nil
|
68
|
+
|
69
|
+
if this and that
|
70
|
+
hsh[vector_name] = this.send(operation, that)
|
71
|
+
else
|
72
|
+
hsh[vector_name] = Daru::Vector.new([], index: all_indexes,
|
73
|
+
name: vector_name)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
Daru::DataFrame.new(hsh, index: all_indexes, name: @name, dtype: @dtype)
|
78
|
+
end
|
79
|
+
|
80
|
+
def scalar_binary_operation operation, other
|
81
|
+
clone = self.dup
|
82
|
+
clone.map_vectors! do |vector|
|
83
|
+
vector = vector.send(operation, other) if vector.type == :numeric
|
84
|
+
vector
|
85
|
+
end
|
86
|
+
|
87
|
+
clone
|
24
88
|
end
|
25
89
|
end
|
26
90
|
end
|
@@ -27,19 +27,27 @@ module Daru
|
|
27
27
|
end
|
28
28
|
|
29
29
|
def exp
|
30
|
-
|
30
|
+
math_unary_op :exp
|
31
31
|
end
|
32
32
|
|
33
33
|
def sqrt
|
34
|
-
|
34
|
+
math_unary_op :sqrt
|
35
35
|
end
|
36
36
|
|
37
|
-
def
|
38
|
-
|
37
|
+
def abs
|
38
|
+
self.dup.map! { |e| e.abs unless e.nil? }
|
39
|
+
end
|
40
|
+
|
41
|
+
def round precision=0
|
42
|
+
self.dup.map! { |e| e.round(precision) unless e.nil? }
|
39
43
|
end
|
40
44
|
|
41
45
|
private
|
42
46
|
|
47
|
+
def math_unary_op operation
|
48
|
+
self.dup.map! { |e| Math.send(operation, e) unless e.nil? }
|
49
|
+
end
|
50
|
+
|
43
51
|
def binary_op operation, other
|
44
52
|
case other
|
45
53
|
when Daru::Vector
|
@@ -50,20 +58,25 @@ module Daru
|
|
50
58
|
end
|
51
59
|
|
52
60
|
def v2o_binary operation, other
|
53
|
-
Daru::Vector.new self.map { |e| e.send(operation, other) },
|
61
|
+
Daru::Vector.new self.map { |e| e.nil? ? nil : e.send(operation, other) },
|
62
|
+
name: @name, index: @index
|
54
63
|
end
|
55
64
|
|
56
65
|
def v2v_binary operation, other
|
57
66
|
common_idxs = []
|
58
67
|
elements = []
|
68
|
+
index = (@index.to_a + other.index.to_a).uniq.sort
|
59
69
|
|
60
|
-
|
70
|
+
index.each do |idx|
|
61
71
|
this = self[idx]
|
62
72
|
that = other[idx]
|
63
73
|
|
64
74
|
if this and that
|
65
75
|
elements << this.send(operation ,that)
|
66
76
|
common_idxs << idx
|
77
|
+
else
|
78
|
+
elements << nil
|
79
|
+
common_idxs << idx
|
67
80
|
end
|
68
81
|
end
|
69
82
|
|
@@ -2,8 +2,109 @@ module Daru
|
|
2
2
|
module Maths
|
3
3
|
module Statistics
|
4
4
|
module DataFrame
|
5
|
-
|
6
|
-
|
5
|
+
# Calculate mean of numeric vectors.
|
6
|
+
def mean
|
7
|
+
compute_stats :mean
|
8
|
+
end
|
9
|
+
|
10
|
+
# Calculate sample standard deviation of numeric vectors.
|
11
|
+
def std
|
12
|
+
compute_stats :std
|
13
|
+
end
|
14
|
+
|
15
|
+
# Calculate sum of numeric vectors
|
16
|
+
def sum
|
17
|
+
compute_stats :sum
|
18
|
+
end
|
19
|
+
|
20
|
+
# Count the number of non-nil values in each vector.
|
21
|
+
def count
|
22
|
+
compute_stats :count
|
23
|
+
end
|
24
|
+
|
25
|
+
# Calculate the maximum value of each numeric vector.
|
26
|
+
def max
|
27
|
+
compute_stats :max
|
28
|
+
end
|
29
|
+
|
30
|
+
# Calculate the minimmum value of each numeric vector.
|
31
|
+
def min
|
32
|
+
compute_stats :min
|
33
|
+
end
|
34
|
+
|
35
|
+
# Compute the product of each numeric vector.
|
36
|
+
def product
|
37
|
+
compute_stats :product
|
38
|
+
end
|
39
|
+
|
40
|
+
# Create a summary of mean, standard deviation, count, max and min of
|
41
|
+
# each numeric vector in the dataframe in one shot.
|
42
|
+
#
|
43
|
+
# == Arguments
|
44
|
+
#
|
45
|
+
# +methods+ - An array with aggregation methods specified as symbols to
|
46
|
+
# be applied to numeric vectors. Default is [:count, :mean, :std, :max,
|
47
|
+
# :min]. Methods will be applied in the specified order.
|
48
|
+
def describe methods=nil
|
49
|
+
methods ||= [:count, :mean, :std, :min, :max]
|
50
|
+
|
51
|
+
description_hash = {}
|
52
|
+
numeric_vectors.each do |vec|
|
53
|
+
description_hash[vec] = methods.map { |m| self[vec].send(m) }
|
54
|
+
end
|
55
|
+
Daru::DataFrame.new(description_hash, index: methods)
|
56
|
+
end
|
57
|
+
|
58
|
+
# Calculate variance-covariance between the numeric vectors.
|
59
|
+
#
|
60
|
+
# == Arguments
|
61
|
+
#
|
62
|
+
# +for_sample_data+ - If set to false, will calculate the population
|
63
|
+
# covariance (denominator N), otherwise calculates the sample covariance
|
64
|
+
# matrix. Default to true.
|
65
|
+
def covariance for_sample_data=true
|
66
|
+
cov_arry =
|
67
|
+
if defined? NMatrix and NMatrix.respond_to?(:cov)
|
68
|
+
to_nmatrix.cov(for_sample_data).to_a
|
69
|
+
else
|
70
|
+
df_as_matrix = to_matrix
|
71
|
+
denominator = for_sample_data ? rows - 1 : rows
|
72
|
+
ones = Matrix.column_vector [1]*rows
|
73
|
+
deviation_scores = df_as_matrix - (ones * ones.transpose * df_as_matrix) / rows
|
74
|
+
((deviation_scores.transpose * deviation_scores) / denominator).to_a
|
75
|
+
end
|
76
|
+
|
77
|
+
Daru::DataFrame.rows(cov_arry, index: numeric_vectors, order: numeric_vectors)
|
78
|
+
end
|
79
|
+
|
80
|
+
alias :cov :covariance
|
81
|
+
|
82
|
+
# Calculate the correlation between the numeric vectors.
|
83
|
+
def correlation
|
84
|
+
corr_arry =
|
85
|
+
if defined? NMatrix and NMatrix.respond_to?(:corr)
|
86
|
+
to_nmatrix.corr.to_a
|
87
|
+
else
|
88
|
+
standard_deviation = std.to_matrix
|
89
|
+
(cov.to_matrix.elementwise_division(standard_deviation.transpose *
|
90
|
+
standard_deviation)).to_a
|
91
|
+
end
|
92
|
+
|
93
|
+
Daru::DataFrame.rows(corr_arry, index: numeric_vectors, order: numeric_vectors)
|
94
|
+
end
|
95
|
+
|
96
|
+
alias :corr :correlation
|
97
|
+
|
98
|
+
private
|
99
|
+
|
100
|
+
def compute_stats method
|
101
|
+
Daru::Vector.new(
|
102
|
+
numeric_vectors.inject({}) do |hash, vec|
|
103
|
+
hash[vec] = self[vec].send(method)
|
104
|
+
hash
|
105
|
+
end
|
106
|
+
)
|
107
|
+
end
|
7
108
|
end
|
8
109
|
end
|
9
110
|
end
|
@@ -1,47 +1,59 @@
|
|
1
1
|
module Daru
|
2
2
|
module Maths
|
3
|
+
# Encapsulates statistics methods for vectors. Most basic stuff like mean, etc.
|
4
|
+
# is done inside the wrapper, so that native methods can be used for most of
|
5
|
+
# the computationally intensive tasks.
|
3
6
|
module Statistics
|
4
7
|
module Vector
|
5
|
-
|
6
8
|
def mean
|
7
|
-
@
|
9
|
+
@data.mean
|
8
10
|
end
|
9
11
|
|
10
|
-
def
|
11
|
-
@
|
12
|
+
def sum
|
13
|
+
@data.sum
|
12
14
|
end
|
13
15
|
|
14
|
-
def
|
15
|
-
@
|
16
|
+
def product
|
17
|
+
@data.product
|
16
18
|
end
|
17
19
|
|
18
|
-
def
|
19
|
-
@
|
20
|
+
def min
|
21
|
+
@data.min
|
20
22
|
end
|
21
23
|
|
22
|
-
def
|
23
|
-
|
24
|
+
def range
|
25
|
+
max - min
|
26
|
+
end
|
27
|
+
|
28
|
+
def median
|
29
|
+
percentile 50
|
30
|
+
end
|
31
|
+
|
32
|
+
def mode
|
33
|
+
freqs = frequencies.values
|
34
|
+
@data[freqs.index(freqs.max)]
|
24
35
|
end
|
25
36
|
|
26
37
|
def median_absolute_deviation
|
27
|
-
|
38
|
+
m = median
|
39
|
+
map {|val| (val - m).abs }.median
|
28
40
|
end
|
29
41
|
|
30
42
|
def standard_error
|
31
|
-
@
|
43
|
+
standard_deviation_sample/(Math::sqrt((@size - @nil_positions.size)))
|
32
44
|
end
|
33
45
|
|
34
46
|
def sum_of_squared_deviation
|
35
|
-
@
|
47
|
+
(@data.to_a.inject(0) { |a,x| x.square + a } - (sum.square.quo((@size - @nil_positions.size)))).to_f
|
36
48
|
end
|
37
49
|
|
38
50
|
# Maximum element of the vector.
|
39
51
|
#
|
40
52
|
# @param return_type [Symbol] Data type of the returned value. Defaults
|
41
|
-
#
|
42
|
-
#
|
53
|
+
# to returning only the maximum number but passing *:vector* will return
|
54
|
+
# a Daru::Vector with the index of the corresponding maximum value.
|
43
55
|
def max return_type=:stored_type
|
44
|
-
max_value = @
|
56
|
+
max_value = @data.max
|
45
57
|
if return_type == :vector
|
46
58
|
Daru::Vector.new({index_of(max_value) => max_value}, name: @name, dtype: @dtype)
|
47
59
|
else
|
@@ -49,98 +61,127 @@ module Daru
|
|
49
61
|
end
|
50
62
|
end
|
51
63
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
def has_missing_data?
|
57
|
-
@vector.has_missing_data?
|
58
|
-
end
|
59
|
-
|
60
|
-
def range
|
61
|
-
@vector.range
|
64
|
+
# Return a Vector with the max element and its index.
|
65
|
+
# @return [Daru::Vector]
|
66
|
+
def max_index
|
67
|
+
max :vector
|
62
68
|
end
|
63
69
|
|
64
70
|
def frequencies
|
65
|
-
@
|
71
|
+
@data.inject({}) do |hash, element|
|
72
|
+
hash[element] ||= 0
|
73
|
+
hash[element] += 1
|
74
|
+
hash
|
75
|
+
end
|
66
76
|
end
|
67
77
|
|
68
78
|
def proportions
|
69
|
-
|
79
|
+
len = n_valid
|
80
|
+
frequencies.inject({}) { |hash, arr| hash[arr[0]] = arr[1] / len; hash }
|
70
81
|
end
|
71
82
|
|
72
83
|
def ranked
|
73
|
-
|
84
|
+
sum = 0
|
85
|
+
r = frequencies.sort.inject( {} ) do |memo, val|
|
86
|
+
memo[val[0]] = ((sum + 1) + (sum + val[1])) / 2
|
87
|
+
sum += val[1]
|
88
|
+
memo
|
89
|
+
end
|
90
|
+
|
91
|
+
Daru::Vector.new @data.map { |e| r[e] }, index: self.index,
|
92
|
+
name: self.name, dtype: self.dtype, nm_dtype: self.nm_dtype
|
74
93
|
end
|
75
94
|
|
76
95
|
def coefficient_of_variation
|
77
|
-
|
96
|
+
standard_deviation_sample / mean
|
78
97
|
end
|
79
98
|
|
80
|
-
# Retrieves number of cases which comply condition.
|
81
|
-
#
|
82
|
-
#
|
83
|
-
#
|
84
|
-
# this value.
|
99
|
+
# Retrieves number of cases which comply condition. If block given,
|
100
|
+
# retrieves number of instances where block returns true. If other
|
101
|
+
# values given, retrieves the frequency for this value. If no value
|
102
|
+
# given, counts the number of non-nil elements in the Vector.
|
85
103
|
def count value=false
|
86
|
-
|
104
|
+
if block_given?
|
105
|
+
@data.inject(0){ |memo, val| memo += 1 if yield val; memo}
|
106
|
+
elsif value
|
107
|
+
val = frequencies[value]
|
108
|
+
val.nil? ? 0 : val
|
109
|
+
else
|
110
|
+
size - @nil_positions.size
|
111
|
+
end
|
87
112
|
end
|
88
113
|
|
89
114
|
def proportion value=1
|
90
|
-
|
91
|
-
end
|
92
|
-
|
93
|
-
# Population variance with denominator (N)
|
94
|
-
def variance_population m=nil
|
95
|
-
@vector.variance_population m
|
115
|
+
frequencies[value] / n_valid
|
96
116
|
end
|
97
117
|
|
98
118
|
# Sample variance with denominator (N-1)
|
99
119
|
def variance_sample m=nil
|
100
|
-
|
120
|
+
m ||= self.mean
|
121
|
+
sum_of_squares(m).quo((@size - @nil_positions.size) - 1)
|
101
122
|
end
|
102
123
|
|
103
|
-
|
104
|
-
|
124
|
+
# Population variance with denominator (N)
|
125
|
+
def variance_population m=nil
|
126
|
+
m ||= mean
|
127
|
+
sum_of_squares(m).quo((@size - @nil_positions.size)).to_f
|
105
128
|
end
|
106
129
|
|
107
|
-
def
|
108
|
-
|
130
|
+
def sum_of_squares(m=nil)
|
131
|
+
m ||= mean
|
132
|
+
@data.inject(0) { |memo, val| memo + (val - m)**2 }
|
109
133
|
end
|
110
134
|
|
111
135
|
def standard_deviation_population m=nil
|
112
|
-
|
136
|
+
m ||= mean
|
137
|
+
Math::sqrt(variance_population(m))
|
138
|
+
end
|
139
|
+
|
140
|
+
def standard_deviation_sample m=nil
|
141
|
+
Math::sqrt(variance_sample(m))
|
113
142
|
end
|
114
143
|
|
115
144
|
# Calculate skewness using (sigma(xi - mean)^3)/((N)*std_dev_sample^3)
|
116
145
|
def skew m=nil
|
117
|
-
|
146
|
+
m ||= mean
|
147
|
+
th = @data.inject(0) { |memo, val| memo + ((val - m)**3) }
|
148
|
+
th.quo ((@size - @nil_positions.size) * (standard_deviation_sample(m)**3))
|
118
149
|
end
|
119
150
|
|
120
151
|
def kurtosis m=nil
|
121
|
-
|
152
|
+
m ||= mean
|
153
|
+
fo = @data.inject(0){ |a, x| a + ((x - m) ** 4) }
|
154
|
+
fo.quo((@size - @nil_positions.size) * standard_deviation_sample(m) ** 4) - 3
|
122
155
|
end
|
123
156
|
|
124
157
|
def average_deviation_population m=nil
|
125
|
-
|
158
|
+
m ||= mean
|
159
|
+
(@data.inject(0) {|memo, val| val + (val - m).abs }) / n_valid
|
126
160
|
end
|
127
161
|
|
128
162
|
def recode!(&block)
|
129
|
-
@
|
163
|
+
@data.recode!(&block)
|
130
164
|
end
|
131
165
|
|
132
166
|
def percentile percent
|
133
|
-
@
|
167
|
+
sorted = @data.sort
|
168
|
+
v = (n_valid * percent).quo(100)
|
169
|
+
if v.to_i != v
|
170
|
+
sorted[v.round]
|
171
|
+
else
|
172
|
+
(sorted[(v - 0.5).round].to_f + sorted[(v + 0.5).round]).quo(2)
|
173
|
+
end
|
134
174
|
end
|
135
175
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
176
|
+
alias :sdp :standard_deviation_population
|
177
|
+
alias :sds :standard_deviation_sample
|
178
|
+
alias :std :sds
|
179
|
+
alias :adp :average_deviation_population
|
180
|
+
alias :cov :coefficient_of_variation
|
181
|
+
alias :variance :variance_sample
|
182
|
+
alias :sd :standard_deviation_sample
|
183
|
+
alias :ss :sum_of_squares
|
184
|
+
alias :percentil :percentile
|
144
185
|
end
|
145
186
|
end
|
146
187
|
end
|