mikon 0.1.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +160 -0
- data/Rakefile +1 -0
- data/example/Mikon_Manipuration.ipynb +582 -0
- data/example/Mikon_stats.ipynb +352 -0
- data/example/Plotting.ipynb +503 -0
- data/lib/mikon.rb +9 -0
- data/lib/mikon/core/array.rb +139 -0
- data/lib/mikon/core/dataframe.rb +400 -0
- data/lib/mikon/core/index.rb +30 -0
- data/lib/mikon/core/series.rb +139 -0
- data/lib/mikon/pivot.rb +36 -0
- data/lib/mikon/plot.rb +66 -0
- data/lib/mikon/stats.rb +227 -0
- data/lib/mikon/version.rb +3 -0
- data/mikon.gemspec +26 -0
- data/spec/core/array_spec.rb +0 -0
- data/spec/core/dataframe_spec.rb +200 -0
- data/spec/core/series_spec.rb +0 -0
- data/spec/data/no_header.csv +2 -0
- data/spec/data/test.csv +3 -0
- data/spec/data/test.tsv +3 -0
- data/spec/spec_helper.rb +2 -0
- metadata +147 -0
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
|
3
|
+
module Mikon
|
4
|
+
# Internal class for indexing
|
5
|
+
class Index
|
6
|
+
extend Forwardable
|
7
|
+
def_delegators :@data, :[]
|
8
|
+
|
9
|
+
def initialize(source, options={})
|
10
|
+
options = {
|
11
|
+
name: nil
|
12
|
+
}.merge(options)
|
13
|
+
|
14
|
+
case
|
15
|
+
when source.is_a?(Array)
|
16
|
+
@data = Mikon::DArray.new(source)
|
17
|
+
when source.is_a?(Mikon::DArray)
|
18
|
+
@data = source
|
19
|
+
else raise ArgumentError
|
20
|
+
end
|
21
|
+
|
22
|
+
@name = options[:name]
|
23
|
+
end
|
24
|
+
|
25
|
+
def sort_by(&block)
|
26
|
+
return self.to_enum(:sort_by) unless block_given?
|
27
|
+
Mikon::Index.new(@data.sort_by(&block))
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,139 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
|
3
|
+
module Mikon
|
4
|
+
class Series
|
5
|
+
include Enumerable
|
6
|
+
extend Forwardable
|
7
|
+
def_delegators :@data, :max, :min
|
8
|
+
def_delegators :@data, *(Mikon::Stats.instance_methods)
|
9
|
+
attr_reader :index, :name
|
10
|
+
|
11
|
+
def initialize(name, source, options={})
|
12
|
+
options = {
|
13
|
+
index: nil
|
14
|
+
}.merge(options)
|
15
|
+
|
16
|
+
case
|
17
|
+
when source.is_a?(Array) || source.is_a?(NMatrix)
|
18
|
+
@data = Mikon::DArray.new(source)
|
19
|
+
when source.is_a?(Mikon::DArray)
|
20
|
+
@data = source
|
21
|
+
else
|
22
|
+
raise "Non-acceptable Arguments Error"
|
23
|
+
end
|
24
|
+
|
25
|
+
@index = options[:index]
|
26
|
+
@name = name
|
27
|
+
|
28
|
+
_check_if_valid
|
29
|
+
end
|
30
|
+
|
31
|
+
def _check_if_valid
|
32
|
+
@index = (0..(length-1)).to_a if @index.nil?
|
33
|
+
raise "index should have the same length as arrays" if @index.length != @data.length
|
34
|
+
end
|
35
|
+
|
36
|
+
def length
|
37
|
+
@data.length
|
38
|
+
end
|
39
|
+
|
40
|
+
def each(&block)
|
41
|
+
@data.each(&block)
|
42
|
+
end
|
43
|
+
|
44
|
+
def [](arg)
|
45
|
+
pos = @index.index(arg)
|
46
|
+
raise "There is no index named" + arg.to_s if pos.nil?
|
47
|
+
@data[pos]
|
48
|
+
end
|
49
|
+
|
50
|
+
def to_html(threshold=5)
|
51
|
+
html = "<table><tr><th></th><th>" + self.name.to_s + "</th></tr>"
|
52
|
+
@index.each.with_index do |index, pos|
|
53
|
+
next if pos > threshold && pos != self.length-1
|
54
|
+
html += "<tr><th>" + index.to_s + "</th><td>" + @data[pos].to_s + "</td></tr>"
|
55
|
+
html += "<tr><th>...</th><td>...</td></tr>" if pos == threshold
|
56
|
+
end
|
57
|
+
html + "</table>"
|
58
|
+
end
|
59
|
+
|
60
|
+
def to_s(threshold=5)
|
61
|
+
arr = []
|
62
|
+
@index.each.with_index do |index, pos|
|
63
|
+
next nil if pos > threshold && pos != self.length-1
|
64
|
+
arr.push({"" => index, @name => @data[pos]})
|
65
|
+
arr.push({"" => "...", @name => "..."}) if pos == threshold
|
66
|
+
end
|
67
|
+
Formatador.display_table(arr.select{|el| !(el.nil?)})
|
68
|
+
end
|
69
|
+
|
70
|
+
def name(new_name=nil)
|
71
|
+
if new_name.nil?
|
72
|
+
@name
|
73
|
+
else
|
74
|
+
@name = new_name
|
75
|
+
self
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def to_a
|
80
|
+
@data.to_a
|
81
|
+
end
|
82
|
+
|
83
|
+
def to_darr
|
84
|
+
@data
|
85
|
+
end
|
86
|
+
|
87
|
+
def *(arg)
|
88
|
+
if arg.is_a?(Numeric)
|
89
|
+
Series.new(self.name, @data*arg, index: self.index)
|
90
|
+
else
|
91
|
+
raise ArgumentError
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def /(arg)
|
96
|
+
if arg.is_a?(Numeric)
|
97
|
+
Series.new(self.name, @data/arg, index: self.index)
|
98
|
+
else
|
99
|
+
raise ArgumentError
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def %(arg)
|
104
|
+
if arg.is_a?(Numeric)
|
105
|
+
Series.new(self.name, @data%arg, index: self.index)
|
106
|
+
else
|
107
|
+
raise ArgumentError
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def -(arg)
|
112
|
+
if arg.is_a?(Mikon::Series) && arg.length == self.length
|
113
|
+
Series.new(self.name, arg.coerce(@data).inject(:-), index: self.index)
|
114
|
+
else
|
115
|
+
raise ArgumentError
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
def +(arg)
|
120
|
+
if arg.is_a?(Mikon::Series) && arg.length == self.length
|
121
|
+
Series.new(self.name, arg.coerce(@data).inject(:+), index: self.index)
|
122
|
+
else
|
123
|
+
raise ArgumentError
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def coerce(other)
|
128
|
+
if other.is_a?(Mikon::DArray)
|
129
|
+
return other, @data
|
130
|
+
elsif other.is_a?(Numeric)
|
131
|
+
return self, other
|
132
|
+
else
|
133
|
+
raise ArgumentError
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
private :_check_if_valid
|
138
|
+
end
|
139
|
+
end
|
data/lib/mikon/pivot.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
module Mikon
|
2
|
+
class DataFrame
|
3
|
+
# Experimental Implementation.
|
4
|
+
# DO NOT USE THIS METHOD
|
5
|
+
def pivot(args={})
|
6
|
+
args = {
|
7
|
+
column: nil,
|
8
|
+
row: nil,
|
9
|
+
value: nil,
|
10
|
+
fill_value: Float::NAN
|
11
|
+
}.merge(args)
|
12
|
+
|
13
|
+
raise ArgumentError unless [:column, :row, :value].all?{|sym| args[sym].is_a?(Symbol)}
|
14
|
+
|
15
|
+
column = self[args[:column]].factors
|
16
|
+
index = self[args[:row]].factors
|
17
|
+
|
18
|
+
source = column.reduce({}) do |memo, label|
|
19
|
+
arr = []
|
20
|
+
df = self.select{|row| row[args[:column]] == label}
|
21
|
+
index.each do |i|
|
22
|
+
unless df.any?{|row| row[args[:row]] == i}
|
23
|
+
arr.push(args[:fill_value])
|
24
|
+
else
|
25
|
+
column = df.select{|row| row[args[:row]] == i}[args[:value]]
|
26
|
+
arr.push(column.to_a[0])
|
27
|
+
end
|
28
|
+
end
|
29
|
+
memo[label] = arr
|
30
|
+
memo
|
31
|
+
end
|
32
|
+
|
33
|
+
Mikon::DataFrame.new(source, index: index)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
data/lib/mikon/plot.rb
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'nyaplot'
|
2
|
+
|
3
|
+
module Mikon
|
4
|
+
class Series
|
5
|
+
def plot(args={})
|
6
|
+
args = {
|
7
|
+
:type => :histogram
|
8
|
+
}.merge(args)
|
9
|
+
|
10
|
+
plot = Nyaplot::Plot.new
|
11
|
+
|
12
|
+
case args[:type]
|
13
|
+
when :histogram
|
14
|
+
plot.add(:histogram, @data.to_a)
|
15
|
+
when :line
|
16
|
+
plot.add(:line, @index, @data.to_a)
|
17
|
+
end
|
18
|
+
|
19
|
+
plot
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
class DataFrame
|
24
|
+
def plot(args={})
|
25
|
+
args = {
|
26
|
+
:type => :line,
|
27
|
+
:x => nil,
|
28
|
+
:y => nil,
|
29
|
+
:fill_by => nil,
|
30
|
+
:color => nil
|
31
|
+
}.merge(args)
|
32
|
+
|
33
|
+
plot = Nyaplot::Plot.new
|
34
|
+
plot.x_label("")
|
35
|
+
plot.y_label("")
|
36
|
+
|
37
|
+
unless args[:color].nil?
|
38
|
+
colors = Nyaplot::Colors.send(args[:color]).to_a
|
39
|
+
else
|
40
|
+
colors = Nyaplot::Colors.qual.to_a
|
41
|
+
end
|
42
|
+
|
43
|
+
case args[:type]
|
44
|
+
when :line
|
45
|
+
@data.each.with_index do |darr, i|
|
46
|
+
line = plot.add(:line, @index, darr.to_a)
|
47
|
+
line.color(colors.pop)
|
48
|
+
line.title(@labels[i])
|
49
|
+
end
|
50
|
+
plot.legend(true)
|
51
|
+
|
52
|
+
when :box
|
53
|
+
plot.add_with_df(self, :box, *@labels)
|
54
|
+
|
55
|
+
when :scatter
|
56
|
+
sc = plot.add_with_df(self, :scatter, args[:x], args[:y])
|
57
|
+
sc.color(colors)
|
58
|
+
sc.fill_by(args[:fill_by]) unless args[:fill_by].nil?
|
59
|
+
plot.x_label(args[:x])
|
60
|
+
plot.y_label(args[:y])
|
61
|
+
end
|
62
|
+
|
63
|
+
plot
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
data/lib/mikon/stats.rb
ADDED
@@ -0,0 +1,227 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
|
3
|
+
# Implementation of statistical functions. Make DArray compatible with Statsample::Vector.
|
4
|
+
#
|
5
|
+
module Mikon
|
6
|
+
module Stats
|
7
|
+
extend Forwardable
|
8
|
+
def_delegators :@data, :size, :max, :min, :push, :sorted_indices
|
9
|
+
|
10
|
+
def average_deviation_population(m=nil)
|
11
|
+
m ||= self.mean
|
12
|
+
(self.reduce(0){|memo, val| val + (val - m).abs})/self.length
|
13
|
+
end
|
14
|
+
|
15
|
+
def coefficient_of_variation
|
16
|
+
self.standard_deviation_sample/self.mean
|
17
|
+
end
|
18
|
+
|
19
|
+
def count(x=false)
|
20
|
+
if block_given?
|
21
|
+
self.reduce(0){|memo, val| memo += 1 if yield val; memo}
|
22
|
+
else
|
23
|
+
val = self.frequencies[x]
|
24
|
+
val.nil? ? 0 : val
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def each(&block)
|
29
|
+
return self.to_enum(:each) unless block_given?
|
30
|
+
@data.each_along_dim(0, &block)
|
31
|
+
end
|
32
|
+
|
33
|
+
def each_index(&block)
|
34
|
+
self.each.with_index(&block)
|
35
|
+
end
|
36
|
+
|
37
|
+
# uniq
|
38
|
+
def factors
|
39
|
+
index = @data.sorted_indices
|
40
|
+
index.reduce([]){|memo, val| memo.push(@data[val]) if memo.last != @data[val]; memo}
|
41
|
+
end
|
42
|
+
|
43
|
+
def frequencies
|
44
|
+
index = @data.sorted_indices
|
45
|
+
index.reduce({}){|memo, val| memo[@data[val]] ||= 0; memo[@data[val]] += 1; memo}
|
46
|
+
end
|
47
|
+
|
48
|
+
def has_missing_data?
|
49
|
+
false
|
50
|
+
end
|
51
|
+
|
52
|
+
def is_valid?
|
53
|
+
true
|
54
|
+
end
|
55
|
+
|
56
|
+
def kurtosis(m=nil)
|
57
|
+
m ||= self.mean
|
58
|
+
fo=self.reduce(0){|a, x| a+((x-m)**4)}
|
59
|
+
fo.quo(self.length*sd(m)**4)-3
|
60
|
+
end
|
61
|
+
|
62
|
+
# alias_method :label, :labeling
|
63
|
+
# labeling(x) would be not implemented
|
64
|
+
|
65
|
+
def mean
|
66
|
+
@data.mean.first
|
67
|
+
end
|
68
|
+
|
69
|
+
def median
|
70
|
+
self.percentil(50)
|
71
|
+
end
|
72
|
+
|
73
|
+
def median_absolute_deviation
|
74
|
+
m = self.median
|
75
|
+
self.recode{|val| (val-m).abls}.median
|
76
|
+
end
|
77
|
+
|
78
|
+
def mode
|
79
|
+
self.frequencies.max
|
80
|
+
end
|
81
|
+
|
82
|
+
def ==(other)
|
83
|
+
@data==other
|
84
|
+
end
|
85
|
+
|
86
|
+
def n_valid
|
87
|
+
self.length
|
88
|
+
end
|
89
|
+
|
90
|
+
def percentil(percent)
|
91
|
+
index = @data.sorted_indices
|
92
|
+
pos = (self.length * percent)/100
|
93
|
+
if pos.to_i == pos
|
94
|
+
@data[index[pos.to_i]]
|
95
|
+
else
|
96
|
+
pos = (pos-0.5).to_i
|
97
|
+
(@data[index[pos]] + @data[index[pos+1]])/2
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def product
|
102
|
+
@data.inject(1){|memo, val| memo*val}
|
103
|
+
end
|
104
|
+
|
105
|
+
def proportion(val=1)
|
106
|
+
self.frequencies[val]/self.n_valid
|
107
|
+
end
|
108
|
+
|
109
|
+
def proportion_confidence_interval_t
|
110
|
+
raise "NotImplementedError"
|
111
|
+
end
|
112
|
+
|
113
|
+
def proportion_confidence_interval_z
|
114
|
+
raise "NotImplementedError"
|
115
|
+
end
|
116
|
+
|
117
|
+
def proportions
|
118
|
+
len = self.n_valid
|
119
|
+
self.frequencies.reduce({}){|memo, arr| memo[arr[0]] = arr[1]/len}
|
120
|
+
end
|
121
|
+
|
122
|
+
def push(val)
|
123
|
+
self.expand(self.length+1)
|
124
|
+
self[self.length-1] = recode
|
125
|
+
end
|
126
|
+
|
127
|
+
def range
|
128
|
+
max - min
|
129
|
+
end
|
130
|
+
|
131
|
+
# ?
|
132
|
+
def ranked
|
133
|
+
sum = 0
|
134
|
+
r = self.frequencies.sort.reduce({}) do |memo, val|
|
135
|
+
memo[val[0]] = ((sum+1) + (sum+val[1]))/2
|
136
|
+
sum += val[1]
|
137
|
+
memo
|
138
|
+
end
|
139
|
+
Mikon::DArray.new(self.reduce{|val| r[val]})
|
140
|
+
end
|
141
|
+
|
142
|
+
def recode(&block)
|
143
|
+
Mikon::DArray.new(@data.map(&block))
|
144
|
+
end
|
145
|
+
|
146
|
+
def recode!(&block)
|
147
|
+
@data.map!(&block)
|
148
|
+
end
|
149
|
+
|
150
|
+
# report_building(b) would not be implemented
|
151
|
+
# sample_with_replacement
|
152
|
+
# sample_without_replacement
|
153
|
+
|
154
|
+
# set_valid_data
|
155
|
+
|
156
|
+
def skew(m=nil)
|
157
|
+
m ||= self.mean
|
158
|
+
th = self.reduce(0){|memo, val| memo + ((val - m)**3)}
|
159
|
+
th/((self.length)*self.sd(m)**3)
|
160
|
+
end
|
161
|
+
|
162
|
+
# split_by_separator_freq
|
163
|
+
# splitted
|
164
|
+
|
165
|
+
def standard_deviation_population(m=nil)
|
166
|
+
m ||= self.mean
|
167
|
+
Math.sqrt(self.variance_population(m))
|
168
|
+
end
|
169
|
+
|
170
|
+
def standard_deviation_sample(m=nil)
|
171
|
+
if !m.nil?
|
172
|
+
Math.sqrt(variance_sample(m))
|
173
|
+
else
|
174
|
+
@data.std.first
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
def standard_error
|
179
|
+
self.standard_deviation_sample/(Math.sqrt(self.length))
|
180
|
+
end
|
181
|
+
|
182
|
+
def sum_of_squared_deviation
|
183
|
+
self.reduce(0){|memo, val| val**2 + memo}
|
184
|
+
end
|
185
|
+
|
186
|
+
def sum_of_squares(m=nil)
|
187
|
+
m ||= self.mean
|
188
|
+
self.reduce(0){|memo, val| memo + (val-m)**2}
|
189
|
+
end
|
190
|
+
|
191
|
+
def sum
|
192
|
+
@data.sum.first
|
193
|
+
end
|
194
|
+
|
195
|
+
# today_values
|
196
|
+
# type=
|
197
|
+
|
198
|
+
# def variance_population
|
199
|
+
# def variance_proportion
|
200
|
+
|
201
|
+
def variance_sample(m=nil)
|
202
|
+
m ||= self.mean
|
203
|
+
self.sum_of_squares(m)/(self.length-1)
|
204
|
+
end
|
205
|
+
|
206
|
+
# def variance_total
|
207
|
+
# def vector_centered
|
208
|
+
# def vector_labeled
|
209
|
+
# def vector_percentil
|
210
|
+
|
211
|
+
def vector_standarized
|
212
|
+
raise "NotImplementedError"
|
213
|
+
end
|
214
|
+
|
215
|
+
alias_method :n, :size
|
216
|
+
alias_method :sd, :standard_deviation_sample
|
217
|
+
alias_method :sds, :standard_deviation_sample
|
218
|
+
alias_method :sdp, :standard_deviation_population
|
219
|
+
alias_method :se, :standard_error
|
220
|
+
alias_method :adp, :average_deviation_population
|
221
|
+
alias_method :mad, :median_absolute_deviation
|
222
|
+
alias_method :ss, :sum_of_squares
|
223
|
+
alias_method :flawed?, :has_missing_data?
|
224
|
+
alias_method :standarized, :vector_standarized
|
225
|
+
alias_method :variance, :variance_sample
|
226
|
+
end
|
227
|
+
end
|