daru 0.0.3.1 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/History.txt +16 -0
- data/README.md +83 -23
- data/daru.gemspec +7 -0
- data/lib/daru/accessors/array_wrapper.rb +248 -0
- data/lib/daru/accessors/nmatrix_wrapper.rb +252 -0
- data/lib/daru/dataframe.rb +171 -72
- data/lib/daru/index.rb +29 -5
- data/lib/daru/io/io.rb +1 -1
- data/lib/daru/{math → maths}/arithmetic/dataframe.rb +1 -1
- data/lib/daru/maths/arithmetic/vector.rb +75 -0
- data/lib/daru/{math → maths}/statistics/dataframe.rb +1 -1
- data/lib/daru/maths/statistics/vector.rb +147 -0
- data/lib/daru/monkeys.rb +16 -10
- data/lib/daru/plotting/dataframe.rb +47 -0
- data/lib/daru/plotting/vector.rb +41 -0
- data/lib/daru/vector.rb +166 -40
- data/lib/version.rb +1 -1
- data/notebooks/intro_with_music_data_.ipynb +318 -0
- data/spec/dataframe_spec.rb +528 -472
- data/spec/fixtures/music_data.tsv +2501 -0
- data/spec/index_spec.rb +8 -0
- data/spec/io/io_spec.rb +1 -0
- data/spec/math/statistics/vector_spec.rb +144 -3
- data/spec/vector_spec.rb +165 -148
- metadata +32 -6
- data/lib/daru/math/arithmetic/vector.rb +0 -71
- data/lib/daru/math/statistics/vector.rb +0 -9
data/lib/daru/index.rb
CHANGED
@@ -14,17 +14,27 @@ module Daru
|
|
14
14
|
|
15
15
|
attr_reader :index_class
|
16
16
|
|
17
|
-
def initialize index
|
17
|
+
def initialize index, values=nil
|
18
18
|
@relation_hash = {}
|
19
19
|
|
20
20
|
index = 0 if index.nil?
|
21
21
|
index = Array.new(index) { |i| i} if index.is_a? Integer
|
22
22
|
|
23
|
-
|
24
|
-
|
23
|
+
if values.nil?
|
24
|
+
index.each_with_index do |n, idx|
|
25
|
+
n = n.to_sym unless n.is_a?(Integer)
|
25
26
|
|
26
|
-
|
27
|
+
@relation_hash[n] = idx
|
28
|
+
end
|
29
|
+
else
|
30
|
+
raise IndexError, "Size of values : #{values.size} and index : #{index.size} do not match" if
|
31
|
+
index.size != values.size
|
32
|
+
|
33
|
+
values.each_with_index do |value,i|
|
34
|
+
@relation_hash[index[i]] = value
|
35
|
+
end
|
27
36
|
end
|
37
|
+
|
28
38
|
@relation_hash.freeze
|
29
39
|
|
30
40
|
@size = @relation_hash.size
|
@@ -43,7 +53,21 @@ module Daru
|
|
43
53
|
end
|
44
54
|
|
45
55
|
def [](key)
|
46
|
-
|
56
|
+
case key
|
57
|
+
when Range
|
58
|
+
first = @relation_hash[key.first]
|
59
|
+
last = @relation_hash[key.last]
|
60
|
+
|
61
|
+
indexes = []
|
62
|
+
|
63
|
+
(first..last).each do |idx|
|
64
|
+
indexes << @relation_hash.key(idx)
|
65
|
+
end
|
66
|
+
|
67
|
+
Daru::Index.new indexes, (first..last).to_a
|
68
|
+
else
|
69
|
+
@relation_hash[key]
|
70
|
+
end
|
47
71
|
end
|
48
72
|
|
49
73
|
def +(other)
|
data/lib/daru/io/io.rb
CHANGED
@@ -0,0 +1,75 @@
|
|
1
|
+
module Daru
|
2
|
+
module Maths
|
3
|
+
module Arithmetic
|
4
|
+
module Vector
|
5
|
+
def + other
|
6
|
+
binary_op :+, other
|
7
|
+
end
|
8
|
+
|
9
|
+
def - other
|
10
|
+
binary_op :-, other
|
11
|
+
end
|
12
|
+
|
13
|
+
def * other
|
14
|
+
binary_op :*, other
|
15
|
+
end
|
16
|
+
|
17
|
+
def / other
|
18
|
+
binary_op :/, other
|
19
|
+
end
|
20
|
+
|
21
|
+
def % other
|
22
|
+
binary_op :%, other
|
23
|
+
end
|
24
|
+
|
25
|
+
def ** other
|
26
|
+
binary_op :**, other
|
27
|
+
end
|
28
|
+
|
29
|
+
def exp
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
def sqrt
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
def round
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def binary_op operation, other
|
44
|
+
case other
|
45
|
+
when Daru::Vector
|
46
|
+
v2v_binary operation, other
|
47
|
+
else
|
48
|
+
v2o_binary operation, other
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def v2o_binary operation, other
|
53
|
+
Daru::Vector.new self.map { |e| e.send(operation, other) }, name: @name, index: @index
|
54
|
+
end
|
55
|
+
|
56
|
+
def v2v_binary operation, other
|
57
|
+
common_idxs = []
|
58
|
+
elements = []
|
59
|
+
|
60
|
+
@index.each do |idx|
|
61
|
+
this = self[idx]
|
62
|
+
that = other[idx]
|
63
|
+
|
64
|
+
if this and that
|
65
|
+
elements << this.send(operation ,that)
|
66
|
+
common_idxs << idx
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
Daru::Vector.new(elements, name: @name, index: common_idxs)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,147 @@
|
|
1
|
+
module Daru
|
2
|
+
module Maths
|
3
|
+
module Statistics
|
4
|
+
module Vector
|
5
|
+
|
6
|
+
def mean
|
7
|
+
@vector.mean
|
8
|
+
end
|
9
|
+
|
10
|
+
def median
|
11
|
+
@vector.median
|
12
|
+
end
|
13
|
+
|
14
|
+
def mode
|
15
|
+
@vector.mode
|
16
|
+
end
|
17
|
+
|
18
|
+
def sum
|
19
|
+
@vector.sum
|
20
|
+
end
|
21
|
+
|
22
|
+
def product
|
23
|
+
@vector.product
|
24
|
+
end
|
25
|
+
|
26
|
+
def median_absolute_deviation
|
27
|
+
@vector.median_absolute_deviation
|
28
|
+
end
|
29
|
+
|
30
|
+
def standard_error
|
31
|
+
@vector.standard_error
|
32
|
+
end
|
33
|
+
|
34
|
+
def sum_of_squared_deviation
|
35
|
+
@vector.sum_of_squared_deviation
|
36
|
+
end
|
37
|
+
|
38
|
+
# Maximum element of the vector.
|
39
|
+
#
|
40
|
+
# @param return_type [Symbol] Data type of the returned value. Defaults
|
41
|
+
# to returning only the maximum number but passing *:vector* will return
|
42
|
+
# a Daru::Vector with the index of the corresponding maximum value.
|
43
|
+
def max return_type=:stored_type
|
44
|
+
max_value = @vector.max
|
45
|
+
if return_type == :vector
|
46
|
+
Daru::Vector.new({index_of(max_value) => max_value}, name: @name, dtype: @dtype)
|
47
|
+
else
|
48
|
+
max_value
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def min
|
53
|
+
@vector.min
|
54
|
+
end
|
55
|
+
|
56
|
+
def has_missing_data?
|
57
|
+
@vector.has_missing_data?
|
58
|
+
end
|
59
|
+
|
60
|
+
def range
|
61
|
+
@vector.range
|
62
|
+
end
|
63
|
+
|
64
|
+
def frequencies
|
65
|
+
@vector.frequencies
|
66
|
+
end
|
67
|
+
|
68
|
+
def proportions
|
69
|
+
@vector.proportions
|
70
|
+
end
|
71
|
+
|
72
|
+
def ranked
|
73
|
+
@vector.ranked
|
74
|
+
end
|
75
|
+
|
76
|
+
def coefficient_of_variation
|
77
|
+
@vector.coefficient_of_variation
|
78
|
+
end
|
79
|
+
|
80
|
+
# Retrieves number of cases which comply condition.
|
81
|
+
# If block given, retrieves number of instances where
|
82
|
+
# block returns true.
|
83
|
+
# If other values given, retrieves the frequency for
|
84
|
+
# this value.
|
85
|
+
def count value=false
|
86
|
+
@vector.count value
|
87
|
+
end
|
88
|
+
|
89
|
+
def proportion value=1
|
90
|
+
@vector.proportion value
|
91
|
+
end
|
92
|
+
|
93
|
+
# Population variance with denominator (N)
|
94
|
+
def variance_population m=nil
|
95
|
+
@vector.variance_population m
|
96
|
+
end
|
97
|
+
|
98
|
+
# Sample variance with denominator (N-1)
|
99
|
+
def variance_sample m=nil
|
100
|
+
@vector.variance_sample m
|
101
|
+
end
|
102
|
+
|
103
|
+
def sum_of_squares m=nil
|
104
|
+
@vector.sum_of_squares m
|
105
|
+
end
|
106
|
+
|
107
|
+
def standard_deviation_sample m=nil
|
108
|
+
@vector.standard_deviation_sample m
|
109
|
+
end
|
110
|
+
|
111
|
+
def standard_deviation_population m=nil
|
112
|
+
@vector.standard_deviation_population m
|
113
|
+
end
|
114
|
+
|
115
|
+
# Calculate skewness using (sigma(xi - mean)^3)/((N)*std_dev_sample^3)
|
116
|
+
def skew m=nil
|
117
|
+
@vector.skew m
|
118
|
+
end
|
119
|
+
|
120
|
+
def kurtosis m=nil
|
121
|
+
@vector.kurtosis m
|
122
|
+
end
|
123
|
+
|
124
|
+
def average_deviation_population m=nil
|
125
|
+
@vector.average_deviation_population m
|
126
|
+
end
|
127
|
+
|
128
|
+
def recode!(&block)
|
129
|
+
@vector.recode!(&block)
|
130
|
+
end
|
131
|
+
|
132
|
+
def percentile percent
|
133
|
+
@vector.percentile percent
|
134
|
+
end
|
135
|
+
|
136
|
+
alias_method :sdp, :standard_deviation_population
|
137
|
+
alias_method :sds, :standard_deviation_sample
|
138
|
+
alias_method :adp, :average_deviation_population
|
139
|
+
# alias_method :cov, :coefficient_of_variation
|
140
|
+
# alias_method :variance, :variance_sample
|
141
|
+
alias_method :sd, :standard_deviation_sample
|
142
|
+
alias_method :ss, :sum_of_squares
|
143
|
+
alias_method :percentil, :percentile
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
data/lib/daru/monkeys.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
class Array
|
2
|
-
def daru_vector name=nil, index=nil
|
3
|
-
Daru::Vector.new self, name: name, index: index
|
2
|
+
def daru_vector name=nil, index=nil, dtype=Array
|
3
|
+
Daru::Vector.new self, name: name, index: index, dtype: dtype
|
4
4
|
end
|
5
5
|
|
6
6
|
alias_method :dv, :daru_vector
|
@@ -11,8 +11,8 @@ class Array
|
|
11
11
|
end
|
12
12
|
|
13
13
|
class Range
|
14
|
-
def daru_vector name=nil, index=nil
|
15
|
-
Daru::Vector.new self, name: name, index: index
|
14
|
+
def daru_vector name=nil, index=nil, dtype=Array
|
15
|
+
Daru::Vector.new self, name: name, index: index, dtype: Array
|
16
16
|
end
|
17
17
|
|
18
18
|
alias_method :dv, :daru_vector
|
@@ -23,25 +23,31 @@ class Range
|
|
23
23
|
end
|
24
24
|
|
25
25
|
class Hash
|
26
|
-
def daru_vector index=nil
|
27
|
-
Daru::Vector.new self.values[0], name: self.keys[0], index: index
|
26
|
+
def daru_vector index=nil, dtype=Array
|
27
|
+
Daru::Vector.new self.values[0], name: self.keys[0], index: index, dtype: Array
|
28
28
|
end
|
29
29
|
|
30
30
|
alias_method :dv, :daru_vector
|
31
31
|
end
|
32
32
|
|
33
33
|
class NMatrix
|
34
|
-
def daru_vector name=nil, index=nil
|
35
|
-
Daru::Vector.new self, name: name, index: index
|
34
|
+
def daru_vector name=nil, index=nil, dtype=NMatrix
|
35
|
+
Daru::Vector.new self, name: name, index: index, dtype: NMatrix
|
36
36
|
end
|
37
37
|
|
38
38
|
alias_method :dv, :daru_vector
|
39
39
|
end
|
40
40
|
|
41
41
|
class MDArray
|
42
|
-
def daru_vector name=nil, index=nil
|
43
|
-
Daru::Vector.new self, name: name, index: index
|
42
|
+
def daru_vector name=nil, index=nil, dtype=MDArray
|
43
|
+
Daru::Vector.new self, name: name, index: index, dtype: MDArray
|
44
44
|
end
|
45
45
|
|
46
46
|
alias_method :dv, :daru_vector
|
47
|
+
end
|
48
|
+
|
49
|
+
class Numeric
|
50
|
+
def square
|
51
|
+
self * self
|
52
|
+
end
|
47
53
|
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
begin
|
2
|
+
require 'nyaplot'
|
3
|
+
rescue LoadError => e
|
4
|
+
puts "#{e}"
|
5
|
+
end
|
6
|
+
|
7
|
+
module Daru
|
8
|
+
module Plotting
|
9
|
+
module DataFrame
|
10
|
+
# Plots a DataFrame with Nyaplot on IRuby using the given options.
|
11
|
+
# == Arguments
|
12
|
+
# +x+ - Vector name to be used for x-axis
|
13
|
+
# +y+ - Vector name to be used for y-axis
|
14
|
+
# == Options
|
15
|
+
# type - Type of plot (scatter, bar, histogram)
|
16
|
+
# title - Title of plot
|
17
|
+
# x_label - X - label
|
18
|
+
# y_label - Y - label
|
19
|
+
# tooltip_contents - Contents of the tooltip. Array of vector names
|
20
|
+
# fill_by - Vector name by which each plotted element is colored
|
21
|
+
# shape_by- Vector name by which dots in a scatter plot are shaped
|
22
|
+
# == Usage
|
23
|
+
# df = Daru::DataFrame.new({a:[0,1,2,3,4], b:[10,20,30,40,50]})
|
24
|
+
# df.plot :a, :b, type: :bar, title: "Awesome plot"
|
25
|
+
def plot x, y, opts={}
|
26
|
+
options = {
|
27
|
+
type: :scatter,
|
28
|
+
title: "#{@name}",
|
29
|
+
}.merge(opts)
|
30
|
+
|
31
|
+
plot = Nyaplot::Plot.new
|
32
|
+
p = plot.add_with_df(Nyaplot::DataFrame.new(self.to_a[0]), options[:type], x, y)
|
33
|
+
plot.x_label options[:x_label] if options[:x_label]
|
34
|
+
plot.y_label options[:y_label] if options[:y_label]
|
35
|
+
p.tooltip_contents options[:tooltip_contents] if options[:tooltip_contents]
|
36
|
+
|
37
|
+
if options[:fill_by] or options[:shape_by]
|
38
|
+
p.color Nyaplot::Colors.qual
|
39
|
+
p.fill_by options[:fill_by] if options[:fill_by]
|
40
|
+
p.shape_by options[:shape_by] if options[:shape_by]
|
41
|
+
end
|
42
|
+
|
43
|
+
plot.show
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
begin
|
2
|
+
require 'nyaplot'
|
3
|
+
rescue LoadError => e
|
4
|
+
puts "#{e}"
|
5
|
+
end
|
6
|
+
|
7
|
+
module Daru
|
8
|
+
module Plotting
|
9
|
+
module Vector
|
10
|
+
|
11
|
+
# Plots a Vector with Nyaplot on IRuby using the given options.
|
12
|
+
# == Options
|
13
|
+
# type (:scatter, :bar, :histogram), title, x_label, y_label, color(true/false)
|
14
|
+
#
|
15
|
+
# == Usage
|
16
|
+
# vector = Daru::Vector.new [10,20,30,40], [:one, :two, :three, :four]
|
17
|
+
# vector.plot type: :bar, title: "My first plot", color: true
|
18
|
+
def plot opts={}
|
19
|
+
options = {
|
20
|
+
type: :scatter,
|
21
|
+
title: "#{@name}",
|
22
|
+
x_label: '',
|
23
|
+
y_label: '',
|
24
|
+
color: false
|
25
|
+
}.merge(opts)
|
26
|
+
|
27
|
+
x_axis = options[:type] == :scatter ? Array.new(@size) { |i| i } : @index.to_a
|
28
|
+
plot = Nyaplot::Plot.new
|
29
|
+
plot.width(options[:width]) if options[:width]
|
30
|
+
plot.height(options[:height]) if options[:height]
|
31
|
+
|
32
|
+
p = plot.add( options[:type], x_axis, @vector.to_a )
|
33
|
+
plot.x_label( options[:x_label] ) if options[:x_label]
|
34
|
+
plot.y_label( options[:y_label] ) if options[:y_label]
|
35
|
+
p.color( Nyaplot::Colors.qual ) if options[:color]
|
36
|
+
|
37
|
+
plot.show
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|