daru 0.0.3.1 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/History.txt +16 -0
- data/README.md +83 -23
- data/daru.gemspec +7 -0
- data/lib/daru/accessors/array_wrapper.rb +248 -0
- data/lib/daru/accessors/nmatrix_wrapper.rb +252 -0
- data/lib/daru/dataframe.rb +171 -72
- data/lib/daru/index.rb +29 -5
- data/lib/daru/io/io.rb +1 -1
- data/lib/daru/{math → maths}/arithmetic/dataframe.rb +1 -1
- data/lib/daru/maths/arithmetic/vector.rb +75 -0
- data/lib/daru/{math → maths}/statistics/dataframe.rb +1 -1
- data/lib/daru/maths/statistics/vector.rb +147 -0
- data/lib/daru/monkeys.rb +16 -10
- data/lib/daru/plotting/dataframe.rb +47 -0
- data/lib/daru/plotting/vector.rb +41 -0
- data/lib/daru/vector.rb +166 -40
- data/lib/version.rb +1 -1
- data/notebooks/intro_with_music_data_.ipynb +318 -0
- data/spec/dataframe_spec.rb +528 -472
- data/spec/fixtures/music_data.tsv +2501 -0
- data/spec/index_spec.rb +8 -0
- data/spec/io/io_spec.rb +1 -0
- data/spec/math/statistics/vector_spec.rb +144 -3
- data/spec/vector_spec.rb +165 -148
- metadata +32 -6
- data/lib/daru/math/arithmetic/vector.rb +0 -71
- data/lib/daru/math/statistics/vector.rb +0 -9
data/lib/daru/index.rb
CHANGED
@@ -14,17 +14,27 @@ module Daru
|
|
14
14
|
|
15
15
|
attr_reader :index_class
|
16
16
|
|
17
|
-
def initialize index
|
17
|
+
def initialize index, values=nil
|
18
18
|
@relation_hash = {}
|
19
19
|
|
20
20
|
index = 0 if index.nil?
|
21
21
|
index = Array.new(index) { |i| i} if index.is_a? Integer
|
22
22
|
|
23
|
-
|
24
|
-
|
23
|
+
if values.nil?
|
24
|
+
index.each_with_index do |n, idx|
|
25
|
+
n = n.to_sym unless n.is_a?(Integer)
|
25
26
|
|
26
|
-
|
27
|
+
@relation_hash[n] = idx
|
28
|
+
end
|
29
|
+
else
|
30
|
+
raise IndexError, "Size of values : #{values.size} and index : #{index.size} do not match" if
|
31
|
+
index.size != values.size
|
32
|
+
|
33
|
+
values.each_with_index do |value,i|
|
34
|
+
@relation_hash[index[i]] = value
|
35
|
+
end
|
27
36
|
end
|
37
|
+
|
28
38
|
@relation_hash.freeze
|
29
39
|
|
30
40
|
@size = @relation_hash.size
|
@@ -43,7 +53,21 @@ module Daru
|
|
43
53
|
end
|
44
54
|
|
45
55
|
def [](key)
|
46
|
-
|
56
|
+
case key
|
57
|
+
when Range
|
58
|
+
first = @relation_hash[key.first]
|
59
|
+
last = @relation_hash[key.last]
|
60
|
+
|
61
|
+
indexes = []
|
62
|
+
|
63
|
+
(first..last).each do |idx|
|
64
|
+
indexes << @relation_hash.key(idx)
|
65
|
+
end
|
66
|
+
|
67
|
+
Daru::Index.new indexes, (first..last).to_a
|
68
|
+
else
|
69
|
+
@relation_hash[key]
|
70
|
+
end
|
47
71
|
end
|
48
72
|
|
49
73
|
def +(other)
|
data/lib/daru/io/io.rb
CHANGED
@@ -0,0 +1,75 @@
|
|
1
|
+
module Daru
|
2
|
+
module Maths
|
3
|
+
module Arithmetic
|
4
|
+
module Vector
|
5
|
+
def + other
|
6
|
+
binary_op :+, other
|
7
|
+
end
|
8
|
+
|
9
|
+
def - other
|
10
|
+
binary_op :-, other
|
11
|
+
end
|
12
|
+
|
13
|
+
def * other
|
14
|
+
binary_op :*, other
|
15
|
+
end
|
16
|
+
|
17
|
+
def / other
|
18
|
+
binary_op :/, other
|
19
|
+
end
|
20
|
+
|
21
|
+
def % other
|
22
|
+
binary_op :%, other
|
23
|
+
end
|
24
|
+
|
25
|
+
def ** other
|
26
|
+
binary_op :**, other
|
27
|
+
end
|
28
|
+
|
29
|
+
def exp
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
def sqrt
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
def round
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def binary_op operation, other
|
44
|
+
case other
|
45
|
+
when Daru::Vector
|
46
|
+
v2v_binary operation, other
|
47
|
+
else
|
48
|
+
v2o_binary operation, other
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def v2o_binary operation, other
|
53
|
+
Daru::Vector.new self.map { |e| e.send(operation, other) }, name: @name, index: @index
|
54
|
+
end
|
55
|
+
|
56
|
+
def v2v_binary operation, other
|
57
|
+
common_idxs = []
|
58
|
+
elements = []
|
59
|
+
|
60
|
+
@index.each do |idx|
|
61
|
+
this = self[idx]
|
62
|
+
that = other[idx]
|
63
|
+
|
64
|
+
if this and that
|
65
|
+
elements << this.send(operation ,that)
|
66
|
+
common_idxs << idx
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
Daru::Vector.new(elements, name: @name, index: common_idxs)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,147 @@
|
|
1
|
+
module Daru
|
2
|
+
module Maths
|
3
|
+
module Statistics
|
4
|
+
module Vector
|
5
|
+
|
6
|
+
def mean
|
7
|
+
@vector.mean
|
8
|
+
end
|
9
|
+
|
10
|
+
def median
|
11
|
+
@vector.median
|
12
|
+
end
|
13
|
+
|
14
|
+
def mode
|
15
|
+
@vector.mode
|
16
|
+
end
|
17
|
+
|
18
|
+
def sum
|
19
|
+
@vector.sum
|
20
|
+
end
|
21
|
+
|
22
|
+
def product
|
23
|
+
@vector.product
|
24
|
+
end
|
25
|
+
|
26
|
+
def median_absolute_deviation
|
27
|
+
@vector.median_absolute_deviation
|
28
|
+
end
|
29
|
+
|
30
|
+
def standard_error
|
31
|
+
@vector.standard_error
|
32
|
+
end
|
33
|
+
|
34
|
+
def sum_of_squared_deviation
|
35
|
+
@vector.sum_of_squared_deviation
|
36
|
+
end
|
37
|
+
|
38
|
+
# Maximum element of the vector.
|
39
|
+
#
|
40
|
+
# @param return_type [Symbol] Data type of the returned value. Defaults
|
41
|
+
# to returning only the maximum number but passing *:vector* will return
|
42
|
+
# a Daru::Vector with the index of the corresponding maximum value.
|
43
|
+
def max return_type=:stored_type
|
44
|
+
max_value = @vector.max
|
45
|
+
if return_type == :vector
|
46
|
+
Daru::Vector.new({index_of(max_value) => max_value}, name: @name, dtype: @dtype)
|
47
|
+
else
|
48
|
+
max_value
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def min
|
53
|
+
@vector.min
|
54
|
+
end
|
55
|
+
|
56
|
+
def has_missing_data?
|
57
|
+
@vector.has_missing_data?
|
58
|
+
end
|
59
|
+
|
60
|
+
def range
|
61
|
+
@vector.range
|
62
|
+
end
|
63
|
+
|
64
|
+
def frequencies
|
65
|
+
@vector.frequencies
|
66
|
+
end
|
67
|
+
|
68
|
+
def proportions
|
69
|
+
@vector.proportions
|
70
|
+
end
|
71
|
+
|
72
|
+
def ranked
|
73
|
+
@vector.ranked
|
74
|
+
end
|
75
|
+
|
76
|
+
def coefficient_of_variation
|
77
|
+
@vector.coefficient_of_variation
|
78
|
+
end
|
79
|
+
|
80
|
+
# Retrieves number of cases which comply condition.
|
81
|
+
# If block given, retrieves number of instances where
|
82
|
+
# block returns true.
|
83
|
+
# If other values given, retrieves the frequency for
|
84
|
+
# this value.
|
85
|
+
def count value=false
|
86
|
+
@vector.count value
|
87
|
+
end
|
88
|
+
|
89
|
+
def proportion value=1
|
90
|
+
@vector.proportion value
|
91
|
+
end
|
92
|
+
|
93
|
+
# Population variance with denominator (N)
|
94
|
+
def variance_population m=nil
|
95
|
+
@vector.variance_population m
|
96
|
+
end
|
97
|
+
|
98
|
+
# Sample variance with denominator (N-1)
|
99
|
+
def variance_sample m=nil
|
100
|
+
@vector.variance_sample m
|
101
|
+
end
|
102
|
+
|
103
|
+
def sum_of_squares m=nil
|
104
|
+
@vector.sum_of_squares m
|
105
|
+
end
|
106
|
+
|
107
|
+
def standard_deviation_sample m=nil
|
108
|
+
@vector.standard_deviation_sample m
|
109
|
+
end
|
110
|
+
|
111
|
+
def standard_deviation_population m=nil
|
112
|
+
@vector.standard_deviation_population m
|
113
|
+
end
|
114
|
+
|
115
|
+
# Calculate skewness using (sigma(xi - mean)^3)/((N)*std_dev_sample^3)
|
116
|
+
def skew m=nil
|
117
|
+
@vector.skew m
|
118
|
+
end
|
119
|
+
|
120
|
+
def kurtosis m=nil
|
121
|
+
@vector.kurtosis m
|
122
|
+
end
|
123
|
+
|
124
|
+
def average_deviation_population m=nil
|
125
|
+
@vector.average_deviation_population m
|
126
|
+
end
|
127
|
+
|
128
|
+
def recode!(&block)
|
129
|
+
@vector.recode!(&block)
|
130
|
+
end
|
131
|
+
|
132
|
+
def percentile percent
|
133
|
+
@vector.percentile percent
|
134
|
+
end
|
135
|
+
|
136
|
+
alias_method :sdp, :standard_deviation_population
|
137
|
+
alias_method :sds, :standard_deviation_sample
|
138
|
+
alias_method :adp, :average_deviation_population
|
139
|
+
# alias_method :cov, :coefficient_of_variation
|
140
|
+
# alias_method :variance, :variance_sample
|
141
|
+
alias_method :sd, :standard_deviation_sample
|
142
|
+
alias_method :ss, :sum_of_squares
|
143
|
+
alias_method :percentil, :percentile
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
data/lib/daru/monkeys.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
class Array
|
2
|
-
def daru_vector name=nil, index=nil
|
3
|
-
Daru::Vector.new self, name: name, index: index
|
2
|
+
def daru_vector name=nil, index=nil, dtype=Array
|
3
|
+
Daru::Vector.new self, name: name, index: index, dtype: dtype
|
4
4
|
end
|
5
5
|
|
6
6
|
alias_method :dv, :daru_vector
|
@@ -11,8 +11,8 @@ class Array
|
|
11
11
|
end
|
12
12
|
|
13
13
|
class Range
|
14
|
-
def daru_vector name=nil, index=nil
|
15
|
-
Daru::Vector.new self, name: name, index: index
|
14
|
+
def daru_vector name=nil, index=nil, dtype=Array
|
15
|
+
Daru::Vector.new self, name: name, index: index, dtype: Array
|
16
16
|
end
|
17
17
|
|
18
18
|
alias_method :dv, :daru_vector
|
@@ -23,25 +23,31 @@ class Range
|
|
23
23
|
end
|
24
24
|
|
25
25
|
class Hash
|
26
|
-
def daru_vector index=nil
|
27
|
-
Daru::Vector.new self.values[0], name: self.keys[0], index: index
|
26
|
+
def daru_vector index=nil, dtype=Array
|
27
|
+
Daru::Vector.new self.values[0], name: self.keys[0], index: index, dtype: Array
|
28
28
|
end
|
29
29
|
|
30
30
|
alias_method :dv, :daru_vector
|
31
31
|
end
|
32
32
|
|
33
33
|
class NMatrix
|
34
|
-
def daru_vector name=nil, index=nil
|
35
|
-
Daru::Vector.new self, name: name, index: index
|
34
|
+
def daru_vector name=nil, index=nil, dtype=NMatrix
|
35
|
+
Daru::Vector.new self, name: name, index: index, dtype: NMatrix
|
36
36
|
end
|
37
37
|
|
38
38
|
alias_method :dv, :daru_vector
|
39
39
|
end
|
40
40
|
|
41
41
|
class MDArray
|
42
|
-
def daru_vector name=nil, index=nil
|
43
|
-
Daru::Vector.new self, name: name, index: index
|
42
|
+
def daru_vector name=nil, index=nil, dtype=MDArray
|
43
|
+
Daru::Vector.new self, name: name, index: index, dtype: MDArray
|
44
44
|
end
|
45
45
|
|
46
46
|
alias_method :dv, :daru_vector
|
47
|
+
end
|
48
|
+
|
49
|
+
class Numeric
|
50
|
+
def square
|
51
|
+
self * self
|
52
|
+
end
|
47
53
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
begin
|
2
|
+
require 'nyaplot'
|
3
|
+
rescue LoadError => e
|
4
|
+
puts "#{e}"
|
5
|
+
end
|
6
|
+
|
7
|
+
module Daru
|
8
|
+
module Plotting
|
9
|
+
module DataFrame
|
10
|
+
# Plots a DataFrame with Nyaplot on IRuby using the given options.
|
11
|
+
# == Arguments
|
12
|
+
# +x+ - Vector name to be used for x-axis
|
13
|
+
# +y+ - Vector name to be used for y-axis
|
14
|
+
# == Options
|
15
|
+
# type - Type of plot (scatter, bar, histogram)
|
16
|
+
# title - Title of plot
|
17
|
+
# x_label - X - label
|
18
|
+
# y_label - Y - label
|
19
|
+
# tooltip_contents - Contents of the tooltip. Array of vector names
|
20
|
+
# fill_by - Vector name by which each plotted element is colored
|
21
|
+
# shape_by- Vector name by which dots in a scatter plot are shaped
|
22
|
+
# == Usage
|
23
|
+
# df = Daru::DataFrame.new({a:[0,1,2,3,4], b:[10,20,30,40,50]})
|
24
|
+
# df.plot :a, :b, type: :bar, title: "Awesome plot"
|
25
|
+
def plot x, y, opts={}
|
26
|
+
options = {
|
27
|
+
type: :scatter,
|
28
|
+
title: "#{@name}",
|
29
|
+
}.merge(opts)
|
30
|
+
|
31
|
+
plot = Nyaplot::Plot.new
|
32
|
+
p = plot.add_with_df(Nyaplot::DataFrame.new(self.to_a[0]), options[:type], x, y)
|
33
|
+
plot.x_label options[:x_label] if options[:x_label]
|
34
|
+
plot.y_label options[:y_label] if options[:y_label]
|
35
|
+
p.tooltip_contents options[:tooltip_contents] if options[:tooltip_contents]
|
36
|
+
|
37
|
+
if options[:fill_by] or options[:shape_by]
|
38
|
+
p.color Nyaplot::Colors.qual
|
39
|
+
p.fill_by options[:fill_by] if options[:fill_by]
|
40
|
+
p.shape_by options[:shape_by] if options[:shape_by]
|
41
|
+
end
|
42
|
+
|
43
|
+
plot.show
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
begin
|
2
|
+
require 'nyaplot'
|
3
|
+
rescue LoadError => e
|
4
|
+
puts "#{e}"
|
5
|
+
end
|
6
|
+
|
7
|
+
module Daru
|
8
|
+
module Plotting
|
9
|
+
module Vector
|
10
|
+
|
11
|
+
# Plots a Vector with Nyaplot on IRuby using the given options.
|
12
|
+
# == Options
|
13
|
+
# type (:scatter, :bar, :histogram), title, x_label, y_label, color(true/false)
|
14
|
+
#
|
15
|
+
# == Usage
|
16
|
+
# vector = Daru::Vector.new [10,20,30,40], [:one, :two, :three, :four]
|
17
|
+
# vector.plot type: :bar, title: "My first plot", color: true
|
18
|
+
def plot opts={}
|
19
|
+
options = {
|
20
|
+
type: :scatter,
|
21
|
+
title: "#{@name}",
|
22
|
+
x_label: '',
|
23
|
+
y_label: '',
|
24
|
+
color: false
|
25
|
+
}.merge(opts)
|
26
|
+
|
27
|
+
x_axis = options[:type] == :scatter ? Array.new(@size) { |i| i } : @index.to_a
|
28
|
+
plot = Nyaplot::Plot.new
|
29
|
+
plot.width(options[:width]) if options[:width]
|
30
|
+
plot.height(options[:height]) if options[:height]
|
31
|
+
|
32
|
+
p = plot.add( options[:type], x_axis, @vector.to_a )
|
33
|
+
plot.x_label( options[:x_label] ) if options[:x_label]
|
34
|
+
plot.y_label( options[:y_label] ) if options[:y_label]
|
35
|
+
p.color( Nyaplot::Colors.qual ) if options[:color]
|
36
|
+
|
37
|
+
plot.show
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|