mikon 0.1.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,30 @@
1
+ require 'forwardable'
2
+
3
+ module Mikon
4
+ # Internal class for indexing
5
+ class Index
6
+ extend Forwardable
7
+ def_delegators :@data, :[]
8
+
9
+ def initialize(source, options={})
10
+ options = {
11
+ name: nil
12
+ }.merge(options)
13
+
14
+ case
15
+ when source.is_a?(Array)
16
+ @data = Mikon::DArray.new(source)
17
+ when source.is_a?(Mikon::DArray)
18
+ @data = source
19
+ else raise ArgumentError
20
+ end
21
+
22
+ @name = options[:name]
23
+ end
24
+
25
+ def sort_by(&block)
26
+ return self.to_enum(:sort_by) unless block_given?
27
+ Mikon::Index.new(@data.sort_by(&block))
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,139 @@
1
+ require 'forwardable'
2
+
3
+ module Mikon
4
+ class Series
5
+ include Enumerable
6
+ extend Forwardable
7
+ def_delegators :@data, :max, :min
8
+ def_delegators :@data, *(Mikon::Stats.instance_methods)
9
+ attr_reader :index, :name
10
+
11
+ def initialize(name, source, options={})
12
+ options = {
13
+ index: nil
14
+ }.merge(options)
15
+
16
+ case
17
+ when source.is_a?(Array) || source.is_a?(NMatrix)
18
+ @data = Mikon::DArray.new(source)
19
+ when source.is_a?(Mikon::DArray)
20
+ @data = source
21
+ else
22
+ raise "Non-acceptable Arguments Error"
23
+ end
24
+
25
+ @index = options[:index]
26
+ @name = name
27
+
28
+ _check_if_valid
29
+ end
30
+
31
+ def _check_if_valid
32
+ @index = (0..(length-1)).to_a if @index.nil?
33
+ raise "index should have the same length as arrays" if @index.length != @data.length
34
+ end
35
+
36
+ def length
37
+ @data.length
38
+ end
39
+
40
+ def each(&block)
41
+ @data.each(&block)
42
+ end
43
+
44
+ def [](arg)
45
+ pos = @index.index(arg)
46
+ raise "There is no index named" + arg.to_s if pos.nil?
47
+ @data[pos]
48
+ end
49
+
50
+ def to_html(threshold=5)
51
+ html = "<table><tr><th></th><th>" + self.name.to_s + "</th></tr>"
52
+ @index.each.with_index do |index, pos|
53
+ next if pos > threshold && pos != self.length-1
54
+ html += "<tr><th>" + index.to_s + "</th><td>" + @data[pos].to_s + "</td></tr>"
55
+ html += "<tr><th>...</th><td>...</td></tr>" if pos == threshold
56
+ end
57
+ html + "</table>"
58
+ end
59
+
60
+ def to_s(threshold=5)
61
+ arr = []
62
+ @index.each.with_index do |index, pos|
63
+ next nil if pos > threshold && pos != self.length-1
64
+ arr.push({"" => index, @name => @data[pos]})
65
+ arr.push({"" => "...", @name => "..."}) if pos == threshold
66
+ end
67
+ Formatador.display_table(arr.select{|el| !(el.nil?)})
68
+ end
69
+
70
+ def name(new_name=nil)
71
+ if new_name.nil?
72
+ @name
73
+ else
74
+ @name = new_name
75
+ self
76
+ end
77
+ end
78
+
79
+ def to_a
80
+ @data.to_a
81
+ end
82
+
83
+ def to_darr
84
+ @data
85
+ end
86
+
87
+ def *(arg)
88
+ if arg.is_a?(Numeric)
89
+ Series.new(self.name, @data*arg, index: self.index)
90
+ else
91
+ raise ArgumentError
92
+ end
93
+ end
94
+
95
+ def /(arg)
96
+ if arg.is_a?(Numeric)
97
+ Series.new(self.name, @data/arg, index: self.index)
98
+ else
99
+ raise ArgumentError
100
+ end
101
+ end
102
+
103
+ def %(arg)
104
+ if arg.is_a?(Numeric)
105
+ Series.new(self.name, @data%arg, index: self.index)
106
+ else
107
+ raise ArgumentError
108
+ end
109
+ end
110
+
111
+ def -(arg)
112
+ if arg.is_a?(Mikon::Series) && arg.length == self.length
113
+ Series.new(self.name, arg.coerce(@data).inject(:-), index: self.index)
114
+ else
115
+ raise ArgumentError
116
+ end
117
+ end
118
+
119
+ def +(arg)
120
+ if arg.is_a?(Mikon::Series) && arg.length == self.length
121
+ Series.new(self.name, arg.coerce(@data).inject(:+), index: self.index)
122
+ else
123
+ raise ArgumentError
124
+ end
125
+ end
126
+
127
+ def coerce(other)
128
+ if other.is_a?(Mikon::DArray)
129
+ return other, @data
130
+ elsif other.is_a?(Numeric)
131
+ return self, other
132
+ else
133
+ raise ArgumentError
134
+ end
135
+ end
136
+
137
+ private :_check_if_valid
138
+ end
139
+ end
@@ -0,0 +1,36 @@
1
+ module Mikon
2
+ class DataFrame
3
+ # Experimental Implementation.
4
+ # DO NOT USE THIS METHOD
5
+ def pivot(args={})
6
+ args = {
7
+ column: nil,
8
+ row: nil,
9
+ value: nil,
10
+ fill_value: Float::NAN
11
+ }.merge(args)
12
+
13
+ raise ArgumentError unless [:column, :row, :value].all?{|sym| args[sym].is_a?(Symbol)}
14
+
15
+ column = self[args[:column]].factors
16
+ index = self[args[:row]].factors
17
+
18
+ source = column.reduce({}) do |memo, label|
19
+ arr = []
20
+ df = self.select{|row| row[args[:column]] == label}
21
+ index.each do |i|
22
+ unless df.any?{|row| row[args[:row]] == i}
23
+ arr.push(args[:fill_value])
24
+ else
25
+ column = df.select{|row| row[args[:row]] == i}[args[:value]]
26
+ arr.push(column.to_a[0])
27
+ end
28
+ end
29
+ memo[label] = arr
30
+ memo
31
+ end
32
+
33
+ Mikon::DataFrame.new(source, index: index)
34
+ end
35
+ end
36
+ end
data/lib/mikon/plot.rb ADDED
@@ -0,0 +1,66 @@
1
+ require 'nyaplot'
2
+
3
+ module Mikon
4
+ class Series
5
+ def plot(args={})
6
+ args = {
7
+ :type => :histogram
8
+ }.merge(args)
9
+
10
+ plot = Nyaplot::Plot.new
11
+
12
+ case args[:type]
13
+ when :histogram
14
+ plot.add(:histogram, @data.to_a)
15
+ when :line
16
+ plot.add(:line, @index, @data.to_a)
17
+ end
18
+
19
+ plot
20
+ end
21
+ end
22
+
23
+ class DataFrame
24
+ def plot(args={})
25
+ args = {
26
+ :type => :line,
27
+ :x => nil,
28
+ :y => nil,
29
+ :fill_by => nil,
30
+ :color => nil
31
+ }.merge(args)
32
+
33
+ plot = Nyaplot::Plot.new
34
+ plot.x_label("")
35
+ plot.y_label("")
36
+
37
+ unless args[:color].nil?
38
+ colors = Nyaplot::Colors.send(args[:color]).to_a
39
+ else
40
+ colors = Nyaplot::Colors.qual.to_a
41
+ end
42
+
43
+ case args[:type]
44
+ when :line
45
+ @data.each.with_index do |darr, i|
46
+ line = plot.add(:line, @index, darr.to_a)
47
+ line.color(colors.pop)
48
+ line.title(@labels[i])
49
+ end
50
+ plot.legend(true)
51
+
52
+ when :box
53
+ plot.add_with_df(self, :box, *@labels)
54
+
55
+ when :scatter
56
+ sc = plot.add_with_df(self, :scatter, args[:x], args[:y])
57
+ sc.color(colors)
58
+ sc.fill_by(args[:fill_by]) unless args[:fill_by].nil?
59
+ plot.x_label(args[:x])
60
+ plot.y_label(args[:y])
61
+ end
62
+
63
+ plot
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,227 @@
1
+ require 'forwardable'
2
+
3
+ # Implementation of statistical functions. Make DArray compatible with Statsample::Vector.
4
+ #
5
+ module Mikon
6
+ module Stats
7
+ extend Forwardable
8
+ def_delegators :@data, :size, :max, :min, :push, :sorted_indices
9
+
10
+ def average_deviation_population(m=nil)
11
+ m ||= self.mean
12
+ (self.reduce(0){|memo, val| val + (val - m).abs})/self.length
13
+ end
14
+
15
+ def coefficient_of_variation
16
+ self.standard_deviation_sample/self.mean
17
+ end
18
+
19
+ def count(x=false)
20
+ if block_given?
21
+ self.reduce(0){|memo, val| memo += 1 if yield val; memo}
22
+ else
23
+ val = self.frequencies[x]
24
+ val.nil? ? 0 : val
25
+ end
26
+ end
27
+
28
+ def each(&block)
29
+ return self.to_enum(:each) unless block_given?
30
+ @data.each_along_dim(0, &block)
31
+ end
32
+
33
+ def each_index(&block)
34
+ self.each.with_index(&block)
35
+ end
36
+
37
+ # uniq
38
+ def factors
39
+ index = @data.sorted_indices
40
+ index.reduce([]){|memo, val| memo.push(@data[val]) if memo.last != @data[val]; memo}
41
+ end
42
+
43
+ def frequencies
44
+ index = @data.sorted_indices
45
+ index.reduce({}){|memo, val| memo[@data[val]] ||= 0; memo[@data[val]] += 1; memo}
46
+ end
47
+
48
+ def has_missing_data?
49
+ false
50
+ end
51
+
52
+ def is_valid?
53
+ true
54
+ end
55
+
56
+ def kurtosis(m=nil)
57
+ m ||= self.mean
58
+ fo=self.reduce(0){|a, x| a+((x-m)**4)}
59
+ fo.quo(self.length*sd(m)**4)-3
60
+ end
61
+
62
+ # alias_method :label, :labeling
63
+ # labeling(x) would be not implemented
64
+
65
+ def mean
66
+ @data.mean.first
67
+ end
68
+
69
+ def median
70
+ self.percentil(50)
71
+ end
72
+
73
+ def median_absolute_deviation
74
+ m = self.median
75
+ self.recode{|val| (val-m).abls}.median
76
+ end
77
+
78
+ def mode
79
+ self.frequencies.max
80
+ end
81
+
82
+ def ==(other)
83
+ @data==other
84
+ end
85
+
86
+ def n_valid
87
+ self.length
88
+ end
89
+
90
+ def percentil(percent)
91
+ index = @data.sorted_indices
92
+ pos = (self.length * percent)/100
93
+ if pos.to_i == pos
94
+ @data[index[pos.to_i]]
95
+ else
96
+ pos = (pos-0.5).to_i
97
+ (@data[index[pos]] + @data[index[pos+1]])/2
98
+ end
99
+ end
100
+
101
+ def product
102
+ @data.inject(1){|memo, val| memo*val}
103
+ end
104
+
105
+ def proportion(val=1)
106
+ self.frequencies[val]/self.n_valid
107
+ end
108
+
109
+ def proportion_confidence_interval_t
110
+ raise "NotImplementedError"
111
+ end
112
+
113
+ def proportion_confidence_interval_z
114
+ raise "NotImplementedError"
115
+ end
116
+
117
+ def proportions
118
+ len = self.n_valid
119
+ self.frequencies.reduce({}){|memo, arr| memo[arr[0]] = arr[1]/len}
120
+ end
121
+
122
+ def push(val)
123
+ self.expand(self.length+1)
124
+ self[self.length-1] = recode
125
+ end
126
+
127
+ def range
128
+ max - min
129
+ end
130
+
131
+ # ?
132
+ def ranked
133
+ sum = 0
134
+ r = self.frequencies.sort.reduce({}) do |memo, val|
135
+ memo[val[0]] = ((sum+1) + (sum+val[1]))/2
136
+ sum += val[1]
137
+ memo
138
+ end
139
+ Mikon::DArray.new(self.reduce{|val| r[val]})
140
+ end
141
+
142
+ def recode(&block)
143
+ Mikon::DArray.new(@data.map(&block))
144
+ end
145
+
146
+ def recode!(&block)
147
+ @data.map!(&block)
148
+ end
149
+
150
+ # report_building(b) would not be implemented
151
+ # sample_with_replacement
152
+ # sample_without_replacement
153
+
154
+ # set_valid_data
155
+
156
+ def skew(m=nil)
157
+ m ||= self.mean
158
+ th = self.reduce(0){|memo, val| memo + ((val - m)**3)}
159
+ th/((self.length)*self.sd(m)**3)
160
+ end
161
+
162
+ # split_by_separator_freq
163
+ # splitted
164
+
165
+ def standard_deviation_population(m=nil)
166
+ m ||= self.mean
167
+ Math.sqrt(self.variance_population(m))
168
+ end
169
+
170
+ def standard_deviation_sample(m=nil)
171
+ if !m.nil?
172
+ Math.sqrt(variance_sample(m))
173
+ else
174
+ @data.std.first
175
+ end
176
+ end
177
+
178
+ def standard_error
179
+ self.standard_deviation_sample/(Math.sqrt(self.length))
180
+ end
181
+
182
+ def sum_of_squared_deviation
183
+ self.reduce(0){|memo, val| val**2 + memo}
184
+ end
185
+
186
+ def sum_of_squares(m=nil)
187
+ m ||= self.mean
188
+ self.reduce(0){|memo, val| memo + (val-m)**2}
189
+ end
190
+
191
+ def sum
192
+ @data.sum.first
193
+ end
194
+
195
+ # today_values
196
+ # type=
197
+
198
+ # def variance_population
199
+ # def variance_proportion
200
+
201
+ def variance_sample(m=nil)
202
+ m ||= self.mean
203
+ self.sum_of_squares(m)/(self.length-1)
204
+ end
205
+
206
+ # def variance_total
207
+ # def vector_centered
208
+ # def vector_labeled
209
+ # def vector_percentil
210
+
211
+ def vector_standarized
212
+ raise "NotImplementedError"
213
+ end
214
+
215
+ alias_method :n, :size
216
+ alias_method :sd, :standard_deviation_sample
217
+ alias_method :sds, :standard_deviation_sample
218
+ alias_method :sdp, :standard_deviation_population
219
+ alias_method :se, :standard_error
220
+ alias_method :adp, :average_deviation_population
221
+ alias_method :mad, :median_absolute_deviation
222
+ alias_method :ss, :sum_of_squares
223
+ alias_method :flawed?, :has_missing_data?
224
+ alias_method :standarized, :vector_standarized
225
+ alias_method :variance, :variance_sample
226
+ end
227
+ end