mikon 0.1.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,30 @@
1
+ require 'forwardable'
2
+
3
+ module Mikon
4
+ # Internal class for indexing
5
+ class Index
6
+ extend Forwardable
7
+ def_delegators :@data, :[]
8
+
9
+ def initialize(source, options={})
10
+ options = {
11
+ name: nil
12
+ }.merge(options)
13
+
14
+ case
15
+ when source.is_a?(Array)
16
+ @data = Mikon::DArray.new(source)
17
+ when source.is_a?(Mikon::DArray)
18
+ @data = source
19
+ else raise ArgumentError
20
+ end
21
+
22
+ @name = options[:name]
23
+ end
24
+
25
+ def sort_by(&block)
26
+ return self.to_enum(:sort_by) unless block_given?
27
+ Mikon::Index.new(@data.sort_by(&block))
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,139 @@
1
+ require 'forwardable'
2
+
3
+ module Mikon
4
+ class Series
5
+ include Enumerable
6
+ extend Forwardable
7
+ def_delegators :@data, :max, :min
8
+ def_delegators :@data, *(Mikon::Stats.instance_methods)
9
+ attr_reader :index, :name
10
+
11
+ def initialize(name, source, options={})
12
+ options = {
13
+ index: nil
14
+ }.merge(options)
15
+
16
+ case
17
+ when source.is_a?(Array) || source.is_a?(NMatrix)
18
+ @data = Mikon::DArray.new(source)
19
+ when source.is_a?(Mikon::DArray)
20
+ @data = source
21
+ else
22
+ raise "Non-acceptable Arguments Error"
23
+ end
24
+
25
+ @index = options[:index]
26
+ @name = name
27
+
28
+ _check_if_valid
29
+ end
30
+
31
+ def _check_if_valid
32
+ @index = (0..(length-1)).to_a if @index.nil?
33
+ raise "index should have the same length as arrays" if @index.length != @data.length
34
+ end
35
+
36
+ def length
37
+ @data.length
38
+ end
39
+
40
+ def each(&block)
41
+ @data.each(&block)
42
+ end
43
+
44
+ def [](arg)
45
+ pos = @index.index(arg)
46
+ raise "There is no index named" + arg.to_s if pos.nil?
47
+ @data[pos]
48
+ end
49
+
50
+ def to_html(threshold=5)
51
+ html = "<table><tr><th></th><th>" + self.name.to_s + "</th></tr>"
52
+ @index.each.with_index do |index, pos|
53
+ next if pos > threshold && pos != self.length-1
54
+ html += "<tr><th>" + index.to_s + "</th><td>" + @data[pos].to_s + "</td></tr>"
55
+ html += "<tr><th>...</th><td>...</td></tr>" if pos == threshold
56
+ end
57
+ html + "</table>"
58
+ end
59
+
60
+ def to_s(threshold=5)
61
+ arr = []
62
+ @index.each.with_index do |index, pos|
63
+ next nil if pos > threshold && pos != self.length-1
64
+ arr.push({"" => index, @name => @data[pos]})
65
+ arr.push({"" => "...", @name => "..."}) if pos == threshold
66
+ end
67
+ Formatador.display_table(arr.select{|el| !(el.nil?)})
68
+ end
69
+
70
+ def name(new_name=nil)
71
+ if new_name.nil?
72
+ @name
73
+ else
74
+ @name = new_name
75
+ self
76
+ end
77
+ end
78
+
79
+ def to_a
80
+ @data.to_a
81
+ end
82
+
83
+ def to_darr
84
+ @data
85
+ end
86
+
87
+ def *(arg)
88
+ if arg.is_a?(Numeric)
89
+ Series.new(self.name, @data*arg, index: self.index)
90
+ else
91
+ raise ArgumentError
92
+ end
93
+ end
94
+
95
+ def /(arg)
96
+ if arg.is_a?(Numeric)
97
+ Series.new(self.name, @data/arg, index: self.index)
98
+ else
99
+ raise ArgumentError
100
+ end
101
+ end
102
+
103
+ def %(arg)
104
+ if arg.is_a?(Numeric)
105
+ Series.new(self.name, @data%arg, index: self.index)
106
+ else
107
+ raise ArgumentError
108
+ end
109
+ end
110
+
111
+ def -(arg)
112
+ if arg.is_a?(Mikon::Series) && arg.length == self.length
113
+ Series.new(self.name, arg.coerce(@data).inject(:-), index: self.index)
114
+ else
115
+ raise ArgumentError
116
+ end
117
+ end
118
+
119
+ def +(arg)
120
+ if arg.is_a?(Mikon::Series) && arg.length == self.length
121
+ Series.new(self.name, arg.coerce(@data).inject(:+), index: self.index)
122
+ else
123
+ raise ArgumentError
124
+ end
125
+ end
126
+
127
+ def coerce(other)
128
+ if other.is_a?(Mikon::DArray)
129
+ return other, @data
130
+ elsif other.is_a?(Numeric)
131
+ return self, other
132
+ else
133
+ raise ArgumentError
134
+ end
135
+ end
136
+
137
+ private :_check_if_valid
138
+ end
139
+ end
@@ -0,0 +1,36 @@
1
+ module Mikon
2
+ class DataFrame
3
+ # Experimental Implementation.
4
+ # DO NOT USE THIS METHOD
5
+ def pivot(args={})
6
+ args = {
7
+ column: nil,
8
+ row: nil,
9
+ value: nil,
10
+ fill_value: Float::NAN
11
+ }.merge(args)
12
+
13
+ raise ArgumentError unless [:column, :row, :value].all?{|sym| args[sym].is_a?(Symbol)}
14
+
15
+ column = self[args[:column]].factors
16
+ index = self[args[:row]].factors
17
+
18
+ source = column.reduce({}) do |memo, label|
19
+ arr = []
20
+ df = self.select{|row| row[args[:column]] == label}
21
+ index.each do |i|
22
+ unless df.any?{|row| row[args[:row]] == i}
23
+ arr.push(args[:fill_value])
24
+ else
25
+ column = df.select{|row| row[args[:row]] == i}[args[:value]]
26
+ arr.push(column.to_a[0])
27
+ end
28
+ end
29
+ memo[label] = arr
30
+ memo
31
+ end
32
+
33
+ Mikon::DataFrame.new(source, index: index)
34
+ end
35
+ end
36
+ end
data/lib/mikon/plot.rb ADDED
@@ -0,0 +1,66 @@
1
+ require 'nyaplot'
2
+
3
+ module Mikon
4
+ class Series
5
+ def plot(args={})
6
+ args = {
7
+ :type => :histogram
8
+ }.merge(args)
9
+
10
+ plot = Nyaplot::Plot.new
11
+
12
+ case args[:type]
13
+ when :histogram
14
+ plot.add(:histogram, @data.to_a)
15
+ when :line
16
+ plot.add(:line, @index, @data.to_a)
17
+ end
18
+
19
+ plot
20
+ end
21
+ end
22
+
23
+ class DataFrame
24
+ def plot(args={})
25
+ args = {
26
+ :type => :line,
27
+ :x => nil,
28
+ :y => nil,
29
+ :fill_by => nil,
30
+ :color => nil
31
+ }.merge(args)
32
+
33
+ plot = Nyaplot::Plot.new
34
+ plot.x_label("")
35
+ plot.y_label("")
36
+
37
+ unless args[:color].nil?
38
+ colors = Nyaplot::Colors.send(args[:color]).to_a
39
+ else
40
+ colors = Nyaplot::Colors.qual.to_a
41
+ end
42
+
43
+ case args[:type]
44
+ when :line
45
+ @data.each.with_index do |darr, i|
46
+ line = plot.add(:line, @index, darr.to_a)
47
+ line.color(colors.pop)
48
+ line.title(@labels[i])
49
+ end
50
+ plot.legend(true)
51
+
52
+ when :box
53
+ plot.add_with_df(self, :box, *@labels)
54
+
55
+ when :scatter
56
+ sc = plot.add_with_df(self, :scatter, args[:x], args[:y])
57
+ sc.color(colors)
58
+ sc.fill_by(args[:fill_by]) unless args[:fill_by].nil?
59
+ plot.x_label(args[:x])
60
+ plot.y_label(args[:y])
61
+ end
62
+
63
+ plot
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,227 @@
1
+ require 'forwardable'
2
+
3
+ # Implementation of statistical functions. Make DArray compatible with Statsample::Vector.
4
+ #
5
+ module Mikon
6
+ module Stats
7
+ extend Forwardable
8
+ def_delegators :@data, :size, :max, :min, :push, :sorted_indices
9
+
10
+ def average_deviation_population(m=nil)
11
+ m ||= self.mean
12
+ (self.reduce(0){|memo, val| val + (val - m).abs})/self.length
13
+ end
14
+
15
+ def coefficient_of_variation
16
+ self.standard_deviation_sample/self.mean
17
+ end
18
+
19
+ def count(x=false)
20
+ if block_given?
21
+ self.reduce(0){|memo, val| memo += 1 if yield val; memo}
22
+ else
23
+ val = self.frequencies[x]
24
+ val.nil? ? 0 : val
25
+ end
26
+ end
27
+
28
+ def each(&block)
29
+ return self.to_enum(:each) unless block_given?
30
+ @data.each_along_dim(0, &block)
31
+ end
32
+
33
+ def each_index(&block)
34
+ self.each.with_index(&block)
35
+ end
36
+
37
+ # uniq
38
+ def factors
39
+ index = @data.sorted_indices
40
+ index.reduce([]){|memo, val| memo.push(@data[val]) if memo.last != @data[val]; memo}
41
+ end
42
+
43
+ def frequencies
44
+ index = @data.sorted_indices
45
+ index.reduce({}){|memo, val| memo[@data[val]] ||= 0; memo[@data[val]] += 1; memo}
46
+ end
47
+
48
+ def has_missing_data?
49
+ false
50
+ end
51
+
52
+ def is_valid?
53
+ true
54
+ end
55
+
56
+ def kurtosis(m=nil)
57
+ m ||= self.mean
58
+ fo=self.reduce(0){|a, x| a+((x-m)**4)}
59
+ fo.quo(self.length*sd(m)**4)-3
60
+ end
61
+
62
+ # alias_method :label, :labeling
63
+ # labeling(x) would be not implemented
64
+
65
+ def mean
66
+ @data.mean.first
67
+ end
68
+
69
+ def median
70
+ self.percentil(50)
71
+ end
72
+
73
+ def median_absolute_deviation
74
+ m = self.median
75
+ self.recode{|val| (val-m).abls}.median
76
+ end
77
+
78
+ def mode
79
+ self.frequencies.max
80
+ end
81
+
82
+ def ==(other)
83
+ @data==other
84
+ end
85
+
86
+ def n_valid
87
+ self.length
88
+ end
89
+
90
+ def percentil(percent)
91
+ index = @data.sorted_indices
92
+ pos = (self.length * percent)/100
93
+ if pos.to_i == pos
94
+ @data[index[pos.to_i]]
95
+ else
96
+ pos = (pos-0.5).to_i
97
+ (@data[index[pos]] + @data[index[pos+1]])/2
98
+ end
99
+ end
100
+
101
+ def product
102
+ @data.inject(1){|memo, val| memo*val}
103
+ end
104
+
105
+ def proportion(val=1)
106
+ self.frequencies[val]/self.n_valid
107
+ end
108
+
109
+ def proportion_confidence_interval_t
110
+ raise "NotImplementedError"
111
+ end
112
+
113
+ def proportion_confidence_interval_z
114
+ raise "NotImplementedError"
115
+ end
116
+
117
+ def proportions
118
+ len = self.n_valid
119
+ self.frequencies.reduce({}){|memo, arr| memo[arr[0]] = arr[1]/len}
120
+ end
121
+
122
+ def push(val)
123
+ self.expand(self.length+1)
124
+ self[self.length-1] = recode
125
+ end
126
+
127
+ def range
128
+ max - min
129
+ end
130
+
131
+ # ?
132
+ def ranked
133
+ sum = 0
134
+ r = self.frequencies.sort.reduce({}) do |memo, val|
135
+ memo[val[0]] = ((sum+1) + (sum+val[1]))/2
136
+ sum += val[1]
137
+ memo
138
+ end
139
+ Mikon::DArray.new(self.reduce{|val| r[val]})
140
+ end
141
+
142
+ def recode(&block)
143
+ Mikon::DArray.new(@data.map(&block))
144
+ end
145
+
146
+ def recode!(&block)
147
+ @data.map!(&block)
148
+ end
149
+
150
+ # report_building(b) would not be implemented
151
+ # sample_with_replacement
152
+ # sample_without_replacement
153
+
154
+ # set_valid_data
155
+
156
+ def skew(m=nil)
157
+ m ||= self.mean
158
+ th = self.reduce(0){|memo, val| memo + ((val - m)**3)}
159
+ th/((self.length)*self.sd(m)**3)
160
+ end
161
+
162
+ # split_by_separator_freq
163
+ # splitted
164
+
165
+ def standard_deviation_population(m=nil)
166
+ m ||= self.mean
167
+ Math.sqrt(self.variance_population(m))
168
+ end
169
+
170
+ def standard_deviation_sample(m=nil)
171
+ if !m.nil?
172
+ Math.sqrt(variance_sample(m))
173
+ else
174
+ @data.std.first
175
+ end
176
+ end
177
+
178
+ def standard_error
179
+ self.standard_deviation_sample/(Math.sqrt(self.length))
180
+ end
181
+
182
+ def sum_of_squared_deviation
183
+ self.reduce(0){|memo, val| val**2 + memo}
184
+ end
185
+
186
+ def sum_of_squares(m=nil)
187
+ m ||= self.mean
188
+ self.reduce(0){|memo, val| memo + (val-m)**2}
189
+ end
190
+
191
+ def sum
192
+ @data.sum.first
193
+ end
194
+
195
+ # today_values
196
+ # type=
197
+
198
+ # def variance_population
199
+ # def variance_proportion
200
+
201
+ def variance_sample(m=nil)
202
+ m ||= self.mean
203
+ self.sum_of_squares(m)/(self.length-1)
204
+ end
205
+
206
+ # def variance_total
207
+ # def vector_centered
208
+ # def vector_labeled
209
+ # def vector_percentil
210
+
211
+ def vector_standarized
212
+ raise "NotImplementedError"
213
+ end
214
+
215
+ alias_method :n, :size
216
+ alias_method :sd, :standard_deviation_sample
217
+ alias_method :sds, :standard_deviation_sample
218
+ alias_method :sdp, :standard_deviation_population
219
+ alias_method :se, :standard_error
220
+ alias_method :adp, :average_deviation_population
221
+ alias_method :mad, :median_absolute_deviation
222
+ alias_method :ss, :sum_of_squares
223
+ alias_method :flawed?, :has_missing_data?
224
+ alias_method :standarized, :vector_standarized
225
+ alias_method :variance, :variance_sample
226
+ end
227
+ end