mikon 0.1.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +160 -0
- data/Rakefile +1 -0
- data/example/Mikon_Manipuration.ipynb +582 -0
- data/example/Mikon_stats.ipynb +352 -0
- data/example/Plotting.ipynb +503 -0
- data/lib/mikon.rb +9 -0
- data/lib/mikon/core/array.rb +139 -0
- data/lib/mikon/core/dataframe.rb +400 -0
- data/lib/mikon/core/index.rb +30 -0
- data/lib/mikon/core/series.rb +139 -0
- data/lib/mikon/pivot.rb +36 -0
- data/lib/mikon/plot.rb +66 -0
- data/lib/mikon/stats.rb +227 -0
- data/lib/mikon/version.rb +3 -0
- data/mikon.gemspec +26 -0
- data/spec/core/array_spec.rb +0 -0
- data/spec/core/dataframe_spec.rb +200 -0
- data/spec/core/series_spec.rb +0 -0
- data/spec/data/no_header.csv +2 -0
- data/spec/data/test.csv +3 -0
- data/spec/data/test.tsv +3 -0
- data/spec/spec_helper.rb +2 -0
- metadata +147 -0
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
|
3
|
+
module Mikon
|
4
|
+
# Internal class for indexing
|
5
|
+
class Index
|
6
|
+
extend Forwardable
|
7
|
+
def_delegators :@data, :[]
|
8
|
+
|
9
|
+
def initialize(source, options={})
|
10
|
+
options = {
|
11
|
+
name: nil
|
12
|
+
}.merge(options)
|
13
|
+
|
14
|
+
case
|
15
|
+
when source.is_a?(Array)
|
16
|
+
@data = Mikon::DArray.new(source)
|
17
|
+
when source.is_a?(Mikon::DArray)
|
18
|
+
@data = source
|
19
|
+
else raise ArgumentError
|
20
|
+
end
|
21
|
+
|
22
|
+
@name = options[:name]
|
23
|
+
end
|
24
|
+
|
25
|
+
def sort_by(&block)
|
26
|
+
return self.to_enum(:sort_by) unless block_given?
|
27
|
+
Mikon::Index.new(@data.sort_by(&block))
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,139 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
|
3
|
+
module Mikon
|
4
|
+
class Series
|
5
|
+
include Enumerable
|
6
|
+
extend Forwardable
|
7
|
+
def_delegators :@data, :max, :min
|
8
|
+
def_delegators :@data, *(Mikon::Stats.instance_methods)
|
9
|
+
attr_reader :index, :name
|
10
|
+
|
11
|
+
def initialize(name, source, options={})
|
12
|
+
options = {
|
13
|
+
index: nil
|
14
|
+
}.merge(options)
|
15
|
+
|
16
|
+
case
|
17
|
+
when source.is_a?(Array) || source.is_a?(NMatrix)
|
18
|
+
@data = Mikon::DArray.new(source)
|
19
|
+
when source.is_a?(Mikon::DArray)
|
20
|
+
@data = source
|
21
|
+
else
|
22
|
+
raise "Non-acceptable Arguments Error"
|
23
|
+
end
|
24
|
+
|
25
|
+
@index = options[:index]
|
26
|
+
@name = name
|
27
|
+
|
28
|
+
_check_if_valid
|
29
|
+
end
|
30
|
+
|
31
|
+
def _check_if_valid
|
32
|
+
@index = (0..(length-1)).to_a if @index.nil?
|
33
|
+
raise "index should have the same length as arrays" if @index.length != @data.length
|
34
|
+
end
|
35
|
+
|
36
|
+
def length
|
37
|
+
@data.length
|
38
|
+
end
|
39
|
+
|
40
|
+
def each(&block)
|
41
|
+
@data.each(&block)
|
42
|
+
end
|
43
|
+
|
44
|
+
def [](arg)
|
45
|
+
pos = @index.index(arg)
|
46
|
+
raise "There is no index named" + arg.to_s if pos.nil?
|
47
|
+
@data[pos]
|
48
|
+
end
|
49
|
+
|
50
|
+
def to_html(threshold=5)
|
51
|
+
html = "<table><tr><th></th><th>" + self.name.to_s + "</th></tr>"
|
52
|
+
@index.each.with_index do |index, pos|
|
53
|
+
next if pos > threshold && pos != self.length-1
|
54
|
+
html += "<tr><th>" + index.to_s + "</th><td>" + @data[pos].to_s + "</td></tr>"
|
55
|
+
html += "<tr><th>...</th><td>...</td></tr>" if pos == threshold
|
56
|
+
end
|
57
|
+
html + "</table>"
|
58
|
+
end
|
59
|
+
|
60
|
+
def to_s(threshold=5)
|
61
|
+
arr = []
|
62
|
+
@index.each.with_index do |index, pos|
|
63
|
+
next nil if pos > threshold && pos != self.length-1
|
64
|
+
arr.push({"" => index, @name => @data[pos]})
|
65
|
+
arr.push({"" => "...", @name => "..."}) if pos == threshold
|
66
|
+
end
|
67
|
+
Formatador.display_table(arr.select{|el| !(el.nil?)})
|
68
|
+
end
|
69
|
+
|
70
|
+
def name(new_name=nil)
|
71
|
+
if new_name.nil?
|
72
|
+
@name
|
73
|
+
else
|
74
|
+
@name = new_name
|
75
|
+
self
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def to_a
|
80
|
+
@data.to_a
|
81
|
+
end
|
82
|
+
|
83
|
+
def to_darr
|
84
|
+
@data
|
85
|
+
end
|
86
|
+
|
87
|
+
def *(arg)
|
88
|
+
if arg.is_a?(Numeric)
|
89
|
+
Series.new(self.name, @data*arg, index: self.index)
|
90
|
+
else
|
91
|
+
raise ArgumentError
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def /(arg)
|
96
|
+
if arg.is_a?(Numeric)
|
97
|
+
Series.new(self.name, @data/arg, index: self.index)
|
98
|
+
else
|
99
|
+
raise ArgumentError
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def %(arg)
|
104
|
+
if arg.is_a?(Numeric)
|
105
|
+
Series.new(self.name, @data%arg, index: self.index)
|
106
|
+
else
|
107
|
+
raise ArgumentError
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def -(arg)
|
112
|
+
if arg.is_a?(Mikon::Series) && arg.length == self.length
|
113
|
+
Series.new(self.name, arg.coerce(@data).inject(:-), index: self.index)
|
114
|
+
else
|
115
|
+
raise ArgumentError
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def +(arg)
|
120
|
+
if arg.is_a?(Mikon::Series) && arg.length == self.length
|
121
|
+
Series.new(self.name, arg.coerce(@data).inject(:+), index: self.index)
|
122
|
+
else
|
123
|
+
raise ArgumentError
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def coerce(other)
|
128
|
+
if other.is_a?(Mikon::DArray)
|
129
|
+
return other, @data
|
130
|
+
elsif other.is_a?(Numeric)
|
131
|
+
return self, other
|
132
|
+
else
|
133
|
+
raise ArgumentError
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
private :_check_if_valid
|
138
|
+
end
|
139
|
+
end
|
data/lib/mikon/pivot.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
module Mikon
|
2
|
+
class DataFrame
|
3
|
+
# Experimental Implementation.
|
4
|
+
# DO NOT USE THIS METHOD
|
5
|
+
def pivot(args={})
|
6
|
+
args = {
|
7
|
+
column: nil,
|
8
|
+
row: nil,
|
9
|
+
value: nil,
|
10
|
+
fill_value: Float::NAN
|
11
|
+
}.merge(args)
|
12
|
+
|
13
|
+
raise ArgumentError unless [:column, :row, :value].all?{|sym| args[sym].is_a?(Symbol)}
|
14
|
+
|
15
|
+
column = self[args[:column]].factors
|
16
|
+
index = self[args[:row]].factors
|
17
|
+
|
18
|
+
source = column.reduce({}) do |memo, label|
|
19
|
+
arr = []
|
20
|
+
df = self.select{|row| row[args[:column]] == label}
|
21
|
+
index.each do |i|
|
22
|
+
unless df.any?{|row| row[args[:row]] == i}
|
23
|
+
arr.push(args[:fill_value])
|
24
|
+
else
|
25
|
+
column = df.select{|row| row[args[:row]] == i}[args[:value]]
|
26
|
+
arr.push(column.to_a[0])
|
27
|
+
end
|
28
|
+
end
|
29
|
+
memo[label] = arr
|
30
|
+
memo
|
31
|
+
end
|
32
|
+
|
33
|
+
Mikon::DataFrame.new(source, index: index)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/lib/mikon/plot.rb
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'nyaplot'
|
2
|
+
|
3
|
+
module Mikon
|
4
|
+
class Series
|
5
|
+
def plot(args={})
|
6
|
+
args = {
|
7
|
+
:type => :histogram
|
8
|
+
}.merge(args)
|
9
|
+
|
10
|
+
plot = Nyaplot::Plot.new
|
11
|
+
|
12
|
+
case args[:type]
|
13
|
+
when :histogram
|
14
|
+
plot.add(:histogram, @data.to_a)
|
15
|
+
when :line
|
16
|
+
plot.add(:line, @index, @data.to_a)
|
17
|
+
end
|
18
|
+
|
19
|
+
plot
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
class DataFrame
|
24
|
+
def plot(args={})
|
25
|
+
args = {
|
26
|
+
:type => :line,
|
27
|
+
:x => nil,
|
28
|
+
:y => nil,
|
29
|
+
:fill_by => nil,
|
30
|
+
:color => nil
|
31
|
+
}.merge(args)
|
32
|
+
|
33
|
+
plot = Nyaplot::Plot.new
|
34
|
+
plot.x_label("")
|
35
|
+
plot.y_label("")
|
36
|
+
|
37
|
+
unless args[:color].nil?
|
38
|
+
colors = Nyaplot::Colors.send(args[:color]).to_a
|
39
|
+
else
|
40
|
+
colors = Nyaplot::Colors.qual.to_a
|
41
|
+
end
|
42
|
+
|
43
|
+
case args[:type]
|
44
|
+
when :line
|
45
|
+
@data.each.with_index do |darr, i|
|
46
|
+
line = plot.add(:line, @index, darr.to_a)
|
47
|
+
line.color(colors.pop)
|
48
|
+
line.title(@labels[i])
|
49
|
+
end
|
50
|
+
plot.legend(true)
|
51
|
+
|
52
|
+
when :box
|
53
|
+
plot.add_with_df(self, :box, *@labels)
|
54
|
+
|
55
|
+
when :scatter
|
56
|
+
sc = plot.add_with_df(self, :scatter, args[:x], args[:y])
|
57
|
+
sc.color(colors)
|
58
|
+
sc.fill_by(args[:fill_by]) unless args[:fill_by].nil?
|
59
|
+
plot.x_label(args[:x])
|
60
|
+
plot.y_label(args[:y])
|
61
|
+
end
|
62
|
+
|
63
|
+
plot
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
data/lib/mikon/stats.rb
ADDED
@@ -0,0 +1,227 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
|
3
|
+
# Implementation of statistical functions. Make DArray compatible with Statsample::Vector.
|
4
|
+
#
|
5
|
+
module Mikon
|
6
|
+
module Stats
|
7
|
+
extend Forwardable
|
8
|
+
def_delegators :@data, :size, :max, :min, :push, :sorted_indices
|
9
|
+
|
10
|
+
def average_deviation_population(m=nil)
|
11
|
+
m ||= self.mean
|
12
|
+
(self.reduce(0){|memo, val| val + (val - m).abs})/self.length
|
13
|
+
end
|
14
|
+
|
15
|
+
def coefficient_of_variation
|
16
|
+
self.standard_deviation_sample/self.mean
|
17
|
+
end
|
18
|
+
|
19
|
+
def count(x=false)
|
20
|
+
if block_given?
|
21
|
+
self.reduce(0){|memo, val| memo += 1 if yield val; memo}
|
22
|
+
else
|
23
|
+
val = self.frequencies[x]
|
24
|
+
val.nil? ? 0 : val
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def each(&block)
|
29
|
+
return self.to_enum(:each) unless block_given?
|
30
|
+
@data.each_along_dim(0, &block)
|
31
|
+
end
|
32
|
+
|
33
|
+
def each_index(&block)
|
34
|
+
self.each.with_index(&block)
|
35
|
+
end
|
36
|
+
|
37
|
+
# uniq
|
38
|
+
def factors
|
39
|
+
index = @data.sorted_indices
|
40
|
+
index.reduce([]){|memo, val| memo.push(@data[val]) if memo.last != @data[val]; memo}
|
41
|
+
end
|
42
|
+
|
43
|
+
def frequencies
|
44
|
+
index = @data.sorted_indices
|
45
|
+
index.reduce({}){|memo, val| memo[@data[val]] ||= 0; memo[@data[val]] += 1; memo}
|
46
|
+
end
|
47
|
+
|
48
|
+
def has_missing_data?
|
49
|
+
false
|
50
|
+
end
|
51
|
+
|
52
|
+
def is_valid?
|
53
|
+
true
|
54
|
+
end
|
55
|
+
|
56
|
+
def kurtosis(m=nil)
|
57
|
+
m ||= self.mean
|
58
|
+
fo=self.reduce(0){|a, x| a+((x-m)**4)}
|
59
|
+
fo.quo(self.length*sd(m)**4)-3
|
60
|
+
end
|
61
|
+
|
62
|
+
# alias_method :label, :labeling
|
63
|
+
# labeling(x) would be not implemented
|
64
|
+
|
65
|
+
def mean
|
66
|
+
@data.mean.first
|
67
|
+
end
|
68
|
+
|
69
|
+
def median
|
70
|
+
self.percentil(50)
|
71
|
+
end
|
72
|
+
|
73
|
+
def median_absolute_deviation
|
74
|
+
m = self.median
|
75
|
+
self.recode{|val| (val-m).abls}.median
|
76
|
+
end
|
77
|
+
|
78
|
+
def mode
|
79
|
+
self.frequencies.max
|
80
|
+
end
|
81
|
+
|
82
|
+
def ==(other)
|
83
|
+
@data==other
|
84
|
+
end
|
85
|
+
|
86
|
+
def n_valid
|
87
|
+
self.length
|
88
|
+
end
|
89
|
+
|
90
|
+
def percentil(percent)
|
91
|
+
index = @data.sorted_indices
|
92
|
+
pos = (self.length * percent)/100
|
93
|
+
if pos.to_i == pos
|
94
|
+
@data[index[pos.to_i]]
|
95
|
+
else
|
96
|
+
pos = (pos-0.5).to_i
|
97
|
+
(@data[index[pos]] + @data[index[pos+1]])/2
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def product
|
102
|
+
@data.inject(1){|memo, val| memo*val}
|
103
|
+
end
|
104
|
+
|
105
|
+
def proportion(val=1)
|
106
|
+
self.frequencies[val]/self.n_valid
|
107
|
+
end
|
108
|
+
|
109
|
+
def proportion_confidence_interval_t
|
110
|
+
raise "NotImplementedError"
|
111
|
+
end
|
112
|
+
|
113
|
+
def proportion_confidence_interval_z
|
114
|
+
raise "NotImplementedError"
|
115
|
+
end
|
116
|
+
|
117
|
+
def proportions
|
118
|
+
len = self.n_valid
|
119
|
+
self.frequencies.reduce({}){|memo, arr| memo[arr[0]] = arr[1]/len}
|
120
|
+
end
|
121
|
+
|
122
|
+
def push(val)
|
123
|
+
self.expand(self.length+1)
|
124
|
+
self[self.length-1] = recode
|
125
|
+
end
|
126
|
+
|
127
|
+
def range
|
128
|
+
max - min
|
129
|
+
end
|
130
|
+
|
131
|
+
# ?
|
132
|
+
def ranked
|
133
|
+
sum = 0
|
134
|
+
r = self.frequencies.sort.reduce({}) do |memo, val|
|
135
|
+
memo[val[0]] = ((sum+1) + (sum+val[1]))/2
|
136
|
+
sum += val[1]
|
137
|
+
memo
|
138
|
+
end
|
139
|
+
Mikon::DArray.new(self.reduce{|val| r[val]})
|
140
|
+
end
|
141
|
+
|
142
|
+
def recode(&block)
|
143
|
+
Mikon::DArray.new(@data.map(&block))
|
144
|
+
end
|
145
|
+
|
146
|
+
def recode!(&block)
|
147
|
+
@data.map!(&block)
|
148
|
+
end
|
149
|
+
|
150
|
+
# report_building(b) would not be implemented
|
151
|
+
# sample_with_replacement
|
152
|
+
# sample_without_replacement
|
153
|
+
|
154
|
+
# set_valid_data
|
155
|
+
|
156
|
+
def skew(m=nil)
|
157
|
+
m ||= self.mean
|
158
|
+
th = self.reduce(0){|memo, val| memo + ((val - m)**3)}
|
159
|
+
th/((self.length)*self.sd(m)**3)
|
160
|
+
end
|
161
|
+
|
162
|
+
# split_by_separator_freq
|
163
|
+
# splitted
|
164
|
+
|
165
|
+
def standard_deviation_population(m=nil)
|
166
|
+
m ||= self.mean
|
167
|
+
Math.sqrt(self.variance_population(m))
|
168
|
+
end
|
169
|
+
|
170
|
+
def standard_deviation_sample(m=nil)
|
171
|
+
if !m.nil?
|
172
|
+
Math.sqrt(variance_sample(m))
|
173
|
+
else
|
174
|
+
@data.std.first
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
def standard_error
|
179
|
+
self.standard_deviation_sample/(Math.sqrt(self.length))
|
180
|
+
end
|
181
|
+
|
182
|
+
def sum_of_squared_deviation
|
183
|
+
self.reduce(0){|memo, val| val**2 + memo}
|
184
|
+
end
|
185
|
+
|
186
|
+
def sum_of_squares(m=nil)
|
187
|
+
m ||= self.mean
|
188
|
+
self.reduce(0){|memo, val| memo + (val-m)**2}
|
189
|
+
end
|
190
|
+
|
191
|
+
def sum
|
192
|
+
@data.sum.first
|
193
|
+
end
|
194
|
+
|
195
|
+
# today_values
|
196
|
+
# type=
|
197
|
+
|
198
|
+
# def variance_population
|
199
|
+
# def variance_proportion
|
200
|
+
|
201
|
+
def variance_sample(m=nil)
|
202
|
+
m ||= self.mean
|
203
|
+
self.sum_of_squares(m)/(self.length-1)
|
204
|
+
end
|
205
|
+
|
206
|
+
# def variance_total
|
207
|
+
# def vector_centered
|
208
|
+
# def vector_labeled
|
209
|
+
# def vector_percentil
|
210
|
+
|
211
|
+
def vector_standarized
|
212
|
+
raise "NotImplementedError"
|
213
|
+
end
|
214
|
+
|
215
|
+
alias_method :n, :size
|
216
|
+
alias_method :sd, :standard_deviation_sample
|
217
|
+
alias_method :sds, :standard_deviation_sample
|
218
|
+
alias_method :sdp, :standard_deviation_population
|
219
|
+
alias_method :se, :standard_error
|
220
|
+
alias_method :adp, :average_deviation_population
|
221
|
+
alias_method :mad, :median_absolute_deviation
|
222
|
+
alias_method :ss, :sum_of_squares
|
223
|
+
alias_method :flawed?, :has_missing_data?
|
224
|
+
alias_method :standarized, :vector_standarized
|
225
|
+
alias_method :variance, :variance_sample
|
226
|
+
end
|
227
|
+
end
|