histogram 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +3 -0
- data/Gemfile +4 -0
- data/LICENSE +15 -0
- data/README.md +67 -0
- data/Rakefile +19 -0
- data/bin/histogram +379 -0
- data/histogram.gemspec +30 -0
- data/lib/histogram.rb +348 -0
- data/lib/histogram/array.rb +5 -0
- data/lib/histogram/narray.rb +5 -0
- data/lib/histogram/plot.rb +7 -0
- data/lib/histogram/version.rb +3 -0
- data/spec/histogram_spec.rb +162 -0
- data/spec/spec_helper.rb +29 -0
- metadata +131 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 4fd1f4a522ac61d4364bc6ae948e472131c14ad0
|
4
|
+
data.tar.gz: b70e61b7811b3db4aa1d69e398debd20c651bc53
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9874a30f356f636e94e879f1149a20b6fe9449a0e1f4c42ae21be6dca79f531df0103d2408b0429dd5542fd0e5e1e3248648cdba0c2c9a6ebeacb5a0b2daf896
|
7
|
+
data.tar.gz: 7999236e1ba6ebaad1bcdb29ff1791e0397d3f7b08ed8f7824e007b63bdc37fbc31cf765b4a7ba206c40b2a303189085468488db37efb4035a490d62348af4b9
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
Copyright (c) 2006, 2007, 2008 The University of Texas at Austin
|
2
|
+
Copyright (c) 2009, University of Colorado at Boulder and Howard Hughes
|
3
|
+
Copyright (c) 2013, Brigham Young University
|
4
|
+
|
5
|
+
The above copyright holders are collectively designated "COPYRIGHT HOLDER"
|
6
|
+
|
7
|
+
Software by John T. Prince
|
8
|
+
|
9
|
+
By using this software the USER indicates that he or she has read, understood and will comply with the following:
|
10
|
+
|
11
|
+
COPYRIGHT HOLDER hereby grants USER permission to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of this software and its documentation for any purpose and without fee, provided that a full copy of this notice is included with the software and its documentation.
|
12
|
+
|
13
|
+
Title to copyright this software and its associated documentation shall at all times remain with COPYRIGHT HOLDER. No right is granted to use in advertising, publicity or otherwise any trademark, service mark, or the name of COPYRIGHT HOLDER.
|
14
|
+
|
15
|
+
This software and any associated documentation are provided "as is," and COPYRIGHT HOLDER MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESSED OR IMPLIED, INCLUDING THOSE OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT USE OF THE SOFTWARE, MODIFICATIONS, OR ASSOCIATED DOCUMENTATION WILL NOT INFRINGE ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER INTELLECTUAL PROPERTY RIGHTS OF A THIRD PARTY. COPYRIGHT HOLDER and associated Regents, officers, and employees shall not be liable under any circumstances for any direct, indirect, special, incidental, or consequential damages with respect to any claim by USER or any third party on account of or arising from the use, or inability to use, this software or its associated documentation, even if COPYRIGHT HOLDER has been advised of the possibility of those damages.
|
data/README.md
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
[![Gem Version](https://badge.fury.io/rb/histogram.png)](http://badge.fury.io/rb/histogram)
|
2
|
+
|
3
|
+
# histogram
|
4
|
+
|
5
|
+
Generates histograms similar to R's hist and numpy's histogram functions.
|
6
|
+
Inspired somewhat by [Richard Cottons's matlab implementation](http://www.mathworks.com/matlabcentral/fileexchange/21033-calculate-number-of-bins-for-histogram)
|
7
|
+
and the wikipedia [histogram article](http://en.wikipedia.org/wiki/Histogram).
|
8
|
+
|
9
|
+
## NOTE
|
10
|
+
|
11
|
+
versions <= 0.1.0 had a stupid bug in the Freedman-Diaconis method for finding
|
12
|
+
bins. So, if you weren't specifying your own number of bins or bin sizes,
|
13
|
+
then you may not have been getting the optimal bin size by default.
|
14
|
+
|
15
|
+
### Typical usage:
|
16
|
+
|
17
|
+
require 'histogram/array' # enables Array#histogram
|
18
|
+
|
19
|
+
data = [0,1,2,2,2,2,2,3,3,3,3,3,3,3,3,3,5,5,9,9,10]
|
20
|
+
# by default, uses Scott's method to calculate optimal number of bins
|
21
|
+
# and the bin values are midpoints between the bin edges
|
22
|
+
(bins, freqs) = data.histogram
|
23
|
+
# equivalent to: data.histogram(:scott, :bin_boundary => :avg)
|
24
|
+
|
25
|
+
### Multiple types of binning behavior:
|
26
|
+
|
27
|
+
# :scott, :fd, :sturges, or :middle
|
28
|
+
data.histogram(:fd) # use Freedman-Diaconis method to calc num bins
|
29
|
+
data.histogram(:middle) # (median value between the three methods)
|
30
|
+
(bins, freqs) = data.histogram(20) # use 20 bins
|
31
|
+
(bins, freqs) = data.histogram([-3,-1,4,5,6]) # custom bins
|
32
|
+
|
33
|
+
(bins, freqs) = data.histogram(10, :min => 2, :max => 12) # 10 bins with set min and max
|
34
|
+
|
35
|
+
# bins are midpoints, but can be set as minima
|
36
|
+
(bins, freqs) = data.histogram([-3,-1,4,5,6], :bin_boundary => :min) # custom bins with :min
|
37
|
+
|
38
|
+
# can also set the bin_width (which interpolates between the min and max of the set)
|
39
|
+
(bins, freqs) = data.histogram(:bin_width => 0.5)
|
40
|
+
|
41
|
+
### Multiple Datasets:
|
42
|
+
|
43
|
+
Sometimes, we want to create histograms where the bins are calculated based on
|
44
|
+
all the data sets. That way, the resulting frequencies will all line up:
|
45
|
+
|
46
|
+
# returns [bins, freq1, freq2 ...]
|
47
|
+
(bins, *freqs) = set1.histogram(30, :other_sets => [[3,3,4,4,5], [-1,0,0,3,3,6]])
|
48
|
+
|
49
|
+
### Histograms with weights/fractions:
|
50
|
+
|
51
|
+
# histogramming with weights
|
52
|
+
data.histogram(20, :weights => [3,3,8,8,9,9,3,3,3,3])
|
53
|
+
|
54
|
+
### Works with NArray objects
|
55
|
+
|
56
|
+
require 'histogram/narray' # enables NArray#histogram
|
57
|
+
# if the calling object is an NArray, the output is two NArrays:
|
58
|
+
(bins, freqs) = NArray.float(20).random!(3).histogram(20)
|
59
|
+
# bins and freqs are both NArray.float objects
|
60
|
+
|
61
|
+
## Installation
|
62
|
+
|
63
|
+
gem install histogram
|
64
|
+
|
65
|
+
## See Also
|
66
|
+
|
67
|
+
[aggregate](http://github.com/josephruscio/aggregate), [rserve-client](http://rubygems.org/gems/rserve-client), [rsruby](http://github.com/alexgutteridge/rsruby)
|
data/Rakefile
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
|
3
|
+
require 'rspec/core'
|
4
|
+
require 'rspec/core/rake_task'
|
5
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
6
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
7
|
+
end
|
8
|
+
|
9
|
+
task :default => :spec
|
10
|
+
|
11
|
+
require 'rdoc/task'
|
12
|
+
Rake::RDocTask.new do |rdoc|
|
13
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
14
|
+
|
15
|
+
rdoc.rdoc_dir = 'rdoc'
|
16
|
+
rdoc.title = "rubabel #{version}"
|
17
|
+
rdoc.rdoc_files.include('README*')
|
18
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
19
|
+
end
|
data/bin/histogram
ADDED
@@ -0,0 +1,379 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
|
4
|
+
raise NotImplementedError, "this executable still needs some work"
|
5
|
+
|
6
|
+
require 'optparse'
|
7
|
+
require 'rubygems'
|
8
|
+
require 'histogram'
|
9
|
+
|
10
|
+
class Array
|
11
|
+
include Histogram
|
12
|
+
end
|
13
|
+
|
14
|
+
opt = {}
|
15
|
+
opt[:type] = 'avg'
|
16
|
+
opt[:bins] = 5
|
17
|
+
opt[:help] = false
|
18
|
+
opt[:plot] = false
|
19
|
+
opt[:labels] = false
|
20
|
+
opt[:with] = 'b'
|
21
|
+
|
22
|
+
progname = File.basename(__FILE__)
|
23
|
+
opts = OptionParser.new do |op|
|
24
|
+
op.banner = "usage: #{progname} [OPTIONS] val val ..."
|
25
|
+
op.separator ""
|
26
|
+
op.separator " outputs bins and frequencies in two rows (space delimited)"
|
27
|
+
#op.separator " of the bins. The number of bins is given by:"
|
28
|
+
#op.separator " ??? 10**(log10(#vals) - 1) ????"
|
29
|
+
op.separator ""
|
30
|
+
op.separator " - ' - ' to separate multiple datasets"
|
31
|
+
op.on("-t", "--type <min|avg>", "binning style [default: #{opt[:type]}]") {|v| opt[:type] = v }
|
32
|
+
op.on("-b", "--bins <num>", "number of bins [default: #{opt[:bins]}]") {|v| opt[:bins] = v }
|
33
|
+
op.separator " --bins <start,stop,[step]> user specified range for bins (*)"
|
34
|
+
op.separator " --bins <num num ...> user specified custom bins (**)"
|
35
|
+
op.on("-l", "--labels <set1,set2,...>", "label the datasets") {|v| opt[:labels] = v }
|
36
|
+
op.on("-c", "--cols", "prints 2 cols instead of rows (tab delim.)") {|v| opt[:cols] = v }
|
37
|
+
op.on("-i", "--input_cols", "interactively write or paste cols") {|v| opt[:input_cols] = v }
|
38
|
+
op.on("-r", "--input_rows", "interactively write or paste rows") {|v| opt[:input_rows] = v }
|
39
|
+
op.on("--file_rows <filename>", "rows inside of a file") {|v| opt[:file_rows] = v }
|
40
|
+
op.on("--yml-ar <filename>", "one array of data in yml") {|v| opt[:yml_ar] = v }
|
41
|
+
op.on("-p", "--plot <filename>", "plots the histogram in extension format (.png|.eps)") {|v| opt[:plot] = v }
|
42
|
+
op.separator " --plot terminal plots in X11 window"
|
43
|
+
op.on("-w", "--with {b|p|lp|l}", "plot with style: ") {|v| opt[:with] = v }
|
44
|
+
op.separator " b=boxes,p=points,lp=lines/points,l=lines"
|
45
|
+
op.on("-h", "--help", "prints arguments and examples") {|v| opt[:help] = v}
|
46
|
+
|
47
|
+
op.separator ""
|
48
|
+
op.separator "NOTES:"
|
49
|
+
op.separator " Do NOT use '--option=val' syntax ; instead use '--option val'"
|
50
|
+
op.separator " * 'stop' will always be included, even if it isn't stepped on"
|
51
|
+
op.separator " ** custom bins specified if > 1 args after --bins"
|
52
|
+
end
|
53
|
+
|
54
|
+
def entry_examples(progname)
|
55
|
+
array = []
|
56
|
+
array << ""
|
57
|
+
array << "ENTRY EXAMPLES:"
|
58
|
+
array << " #{progname} 7 8 9 -b 2"
|
59
|
+
array << " # vals: 7,8,9 ; number_of_bins: 2"
|
60
|
+
array << " #{progname} -b 2 7 8 9"
|
61
|
+
array << " # vals: [none] bins: 2 7 8 9 (<--probably not what you wanted)"
|
62
|
+
array << " #{progname} -b 2 -t min 7 8 9"
|
63
|
+
array << " # vals: 7,8,9 ; number_of_bins: 2 ; type: min"
|
64
|
+
array << " #{progname} 7 8 9 -b 0 1 2 3"
|
65
|
+
array << " # vals: 7,8,9 ; bins: 0 1 2 3"
|
66
|
+
array << " #{progname} -b 0 1 2 3 -- 7 8 9"
|
67
|
+
array << " # vals: 7,8,9 ; bins: 0 1 2 3"
|
68
|
+
array << " #{progname} -i"
|
69
|
+
array << " # [then paste in cols]"
|
70
|
+
array << " #{progname} -r -b 10"
|
71
|
+
array << " # number_of_bins: 10 ; [then paste in datasets as rows]"
|
72
|
+
array << ""
|
73
|
+
array << "PLOT EXAMPLES:"
|
74
|
+
array << " #{progname} 7 8 9 -b 2 -p filename.png"
|
75
|
+
array << " # creates a png image of the plot"
|
76
|
+
array << " #{progname} 7 8 9 -b 2 -p filename.eps -w lp"
|
77
|
+
array << " # creates an eps image of the plot with lines and points"
|
78
|
+
array << " #{progname} 7 8 9 -b 2 -p terminal --with l"
|
79
|
+
array << " # creates a plot in window with lines"
|
80
|
+
array.join("\n")
|
81
|
+
end
|
82
|
+
|
83
|
+
# grab --bins (-b) option and all valid trailing values
|
84
|
+
# returns bins (as array) and args array stripped of --bins stuff
|
85
|
+
def remove_bins(argv)
|
86
|
+
bins = []
|
87
|
+
pickup_bins = false
|
88
|
+
dash_regex = /^-/o
|
89
|
+
newargv = []
|
90
|
+
argv.each do |arg|
|
91
|
+
if arg =~ dash_regex
|
92
|
+
if arg == '-b' || arg == '--bins'
|
93
|
+
pickup_bins = true
|
94
|
+
else ## any other options, then we start picking up vals again
|
95
|
+
pickup_bins = false
|
96
|
+
newargv << arg
|
97
|
+
end
|
98
|
+
elsif pickup_bins
|
99
|
+
bins << arg
|
100
|
+
else
|
101
|
+
newargv << arg
|
102
|
+
end
|
103
|
+
end
|
104
|
+
[bins, newargv]
|
105
|
+
end
|
106
|
+
|
107
|
+
# takes args like this:
|
108
|
+
# %w(23 43 - 432 - 42 43 32)
|
109
|
+
# returns each val as .to_f
|
110
|
+
# -> [[23, 43], [432], [42, 43, 32]]
|
111
|
+
def extract_vals(argv)
|
112
|
+
vals_ar = []
|
113
|
+
if argv.first == '-'
|
114
|
+
argv.shift
|
115
|
+
end
|
116
|
+
vals_ar << []
|
117
|
+
current = vals_ar.last
|
118
|
+
argv.each do |val|
|
119
|
+
if val == '-'
|
120
|
+
vals_ar << []
|
121
|
+
current = vals_ar.last
|
122
|
+
else
|
123
|
+
current << val.to_f
|
124
|
+
end
|
125
|
+
end
|
126
|
+
vals_ar
|
127
|
+
end
|
128
|
+
|
129
|
+
|
130
|
+
# returns as arrays the datasets given
|
131
|
+
# cols is limited to two columns
|
132
|
+
def get_input_vals(cols=true)
|
133
|
+
datasets = []
|
134
|
+
if cols
|
135
|
+
puts "==> Paste (or enter) values as a single column."
|
136
|
+
puts "==> OR paste bins, then values as two columns."
|
137
|
+
puts "==> [multiple datasets can be input on command line,"
|
138
|
+
puts "==> or as rows]"
|
139
|
+
else
|
140
|
+
puts "==> Paste (or enter) values (multiple datasets OK) as rows"
|
141
|
+
puts "==> The first row will be considered the bins unless"
|
142
|
+
puts "==> the bins were already specified on the command line"
|
143
|
+
end
|
144
|
+
lines = []
|
145
|
+
while (string = gets)
|
146
|
+
if string =~ /.+/
|
147
|
+
lines << string
|
148
|
+
else
|
149
|
+
break
|
150
|
+
end
|
151
|
+
end
|
152
|
+
ar_of_ar = lines.map do |line|
|
153
|
+
line.chomp.split(/\s+/)
|
154
|
+
end
|
155
|
+
|
156
|
+
if cols
|
157
|
+
ds0 = []
|
158
|
+
ds1 = []
|
159
|
+
ar_of_ar.each do |ar|
|
160
|
+
if ar[0] != ''
|
161
|
+
ds0 << ar[0]
|
162
|
+
end
|
163
|
+
if ar[1]
|
164
|
+
ds1 << ar[1]
|
165
|
+
end
|
166
|
+
end
|
167
|
+
datasets << ds0
|
168
|
+
datasets << ds1 if ds1.size > 0
|
169
|
+
else
|
170
|
+
datasets = ar_of_ar
|
171
|
+
end
|
172
|
+
datasets
|
173
|
+
end
|
174
|
+
|
175
|
+
|
176
|
+
# escapes any ' chars
|
177
|
+
def escape_to_gnuplot(string)
|
178
|
+
# long way, but it works.
|
179
|
+
new_string = ""
|
180
|
+
string.split(//).each do |chr|
|
181
|
+
if chr == "'" ; new_string << "\\" end
|
182
|
+
new_string << chr
|
183
|
+
end
|
184
|
+
new_string
|
185
|
+
end
|
186
|
+
|
187
|
+
def bin_range_arg_to_custom_bin_array(arg)
|
188
|
+
range = arg.split ','
|
189
|
+
range[2] ||= 1 # no step? then its one
|
190
|
+
range.map! {|v| v.to_f }
|
191
|
+
## Create custom bins from the range numbers
|
192
|
+
if range[1] < range[0] ; abort "stop cannot be less than start" end
|
193
|
+
create_bins(*range)
|
194
|
+
end
|
195
|
+
|
196
|
+
# returns an array of bins
|
197
|
+
# stop is always included, even if it is not stepped on
|
198
|
+
def create_bins(start, stop, step)
|
199
|
+
bins = []
|
200
|
+
(start...stop).step(step) {|v| bins << v }
|
201
|
+
if bins.last != stop
|
202
|
+
bins << stop
|
203
|
+
end
|
204
|
+
bins
|
205
|
+
end
|
206
|
+
|
207
|
+
# takes an array of bins and array of vals
|
208
|
+
# vals can be an array of arrays (multiple histograms)
|
209
|
+
# should be given as single array if its a single set
|
210
|
+
# with = {b|l|lp|p}
|
211
|
+
# boxes,lines,linespoints,points
|
212
|
+
def plot_histogram(bins, vals_ar, filename, bin_type, with, labels=nil)
|
213
|
+
plot_with = case with
|
214
|
+
when 'b' ; 'boxes fs solid'
|
215
|
+
when 'l' ; 'lines'
|
216
|
+
when 'lp' ; 'linespoints'
|
217
|
+
when 'p' ; 'points'
|
218
|
+
end
|
219
|
+
|
220
|
+
|
221
|
+
case filename
|
222
|
+
when 'terminal'
|
223
|
+
output_type = :terminal
|
224
|
+
when /\.eps$/
|
225
|
+
output_type = :eps
|
226
|
+
when /\.png$/
|
227
|
+
output_type = :png
|
228
|
+
else
|
229
|
+
output_type = :png
|
230
|
+
filename << ".png"
|
231
|
+
end
|
232
|
+
|
233
|
+
terminal_string = ''
|
234
|
+
case output_type
|
235
|
+
when :terminal
|
236
|
+
terminal_string = "x11"
|
237
|
+
when :png
|
238
|
+
terminal_string = "png noenhanced"
|
239
|
+
when :eps
|
240
|
+
terminal_string = "postscript eps noenhanced color"
|
241
|
+
# \"Helvetica\""
|
242
|
+
end
|
243
|
+
Gnuplot.open do |gp|
|
244
|
+
Gnuplot::Plot.new( gp ) do |plot|
|
245
|
+
plot.terminal terminal_string
|
246
|
+
plot.output(filename) unless output_type == :terminal
|
247
|
+
#plot.title "Histogram"
|
248
|
+
plot.xlabel "bins (type:#{bin_type})"
|
249
|
+
plot.ylabel "occurences"
|
250
|
+
plot.style "line 1 lt 1"
|
251
|
+
plot.style "line 2 lt 12"
|
252
|
+
#plot.style "line 1 lt 1 lw #{opts.lw} pt 7 ps #{opts.ps}",
|
253
|
+
#plot.yrange "[-0.05:#{1.0 + 0.2*files.size}]"
|
254
|
+
vals_ar.each_with_index do |vals,i|
|
255
|
+
plot.data << Gnuplot::DataSet.new([bins,vals]) do |ds|
|
256
|
+
ds.with = plot_with
|
257
|
+
if labels
|
258
|
+
if labels[i]
|
259
|
+
ds.title = labels[i]
|
260
|
+
else
|
261
|
+
ds.notitle
|
262
|
+
end
|
263
|
+
else
|
264
|
+
ds.notitle
|
265
|
+
end
|
266
|
+
end
|
267
|
+
end
|
268
|
+
end
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
|
273
|
+
|
274
|
+
#################################################
|
275
|
+
# MAIN:
|
276
|
+
#################################################
|
277
|
+
(bins, newargv) = remove_bins(ARGV.to_a)
|
278
|
+
ARGV.clear
|
279
|
+
|
280
|
+
# parse the options out
|
281
|
+
opts.parse!(newargv)
|
282
|
+
|
283
|
+
if opt[:help]
|
284
|
+
puts opts
|
285
|
+
puts entry_examples(progname)
|
286
|
+
exit
|
287
|
+
end
|
288
|
+
|
289
|
+
# get interactive values or pull them off the command line
|
290
|
+
vals_ar = []
|
291
|
+
if opt[:input_cols]
|
292
|
+
datasets = get_input_vals()
|
293
|
+
if datasets.size == 2
|
294
|
+
bins = datasets[0]
|
295
|
+
vals_ar = [datasets[1]]
|
296
|
+
else
|
297
|
+
vals_ar = [datasets[0]]
|
298
|
+
end
|
299
|
+
elsif opt[:input_rows]
|
300
|
+
datasets = get_input_vals(false)
|
301
|
+
## set the bins to the first dataset if none given
|
302
|
+
if bins.size == 0
|
303
|
+
bins = datasets.shift
|
304
|
+
end
|
305
|
+
vals_ar = datasets
|
306
|
+
elsif opt[:file_rows]
|
307
|
+
vals_ar = IO.readlines(opt[:file_rows]).select {|v| v =~ /\d/ and v !~ /^#/ }.map {|v| v.chomp.split(/\s+/) }
|
308
|
+
elsif opt[:yml_ar] # just one array of data
|
309
|
+
require 'yaml'
|
310
|
+
vals_ar = [YAML.load_file(opt[:yml_ar])]
|
311
|
+
else
|
312
|
+
vals_ar = extract_vals(newargv)
|
313
|
+
end
|
314
|
+
|
315
|
+
# print help if we don't have any values given
|
316
|
+
if vals_ar[0].size == 0
|
317
|
+
puts opts
|
318
|
+
exit
|
319
|
+
end
|
320
|
+
|
321
|
+
|
322
|
+
# prep the bins (if one value, then its the # of bins and not a list of bins)
|
323
|
+
if bins.size == 0
|
324
|
+
bins = opt[:bins] ## should be default value since we already swipe the args
|
325
|
+
elsif bins.size == 1
|
326
|
+
bin_arg = bins.first
|
327
|
+
## if there are commas then this specifies a range:
|
328
|
+
if bin_arg =~ /,/
|
329
|
+
bins = bin_range_arg_to_custom_bin_array(bin_arg)
|
330
|
+
## no commas? then this is the number of bins:
|
331
|
+
bins = bins.map {|v| v.to_f }
|
332
|
+
else
|
333
|
+
bins = bin_arg.to_i
|
334
|
+
end
|
335
|
+
else # custom array
|
336
|
+
bins = bins.map {|v| v.to_f }
|
337
|
+
end
|
338
|
+
|
339
|
+
|
340
|
+
vec_ar = vals_ar.map do |vals|
|
341
|
+
vals.map {|v| v.to_f }
|
342
|
+
end
|
343
|
+
first_vec = vec_ar.shift
|
344
|
+
|
345
|
+
#outbins, outfreqs = first_vec.histogram(bins, opt[:type].to_sym) # for one
|
346
|
+
#puts "BINS: #{bins}"
|
347
|
+
#[first_vec, *vec_ar].each do |vec|
|
348
|
+
# puts "DATA: #{vec}"
|
349
|
+
#end
|
350
|
+
|
351
|
+
output = first_vec.histogram(bins, opt[:type].to_sym, *vec_ar)
|
352
|
+
# first ar is the bins, rest are the frequencies
|
353
|
+
|
354
|
+
# output
|
355
|
+
if opt[:cols]
|
356
|
+
SyncEnumerator.new(output).each do |row|
|
357
|
+
puts row.join(' ')
|
358
|
+
end
|
359
|
+
else
|
360
|
+
output.each do |row|
|
361
|
+
puts row.join(' ')
|
362
|
+
end
|
363
|
+
end
|
364
|
+
|
365
|
+
if opt[:labels]
|
366
|
+
labels = opt[:labels].split(',')
|
367
|
+
else
|
368
|
+
labels = nil
|
369
|
+
end
|
370
|
+
|
371
|
+
# convert into arrays
|
372
|
+
output.map! {|vec| vec.to_a }
|
373
|
+
bins = output.shift
|
374
|
+
if opt[:plot]
|
375
|
+
require 'gnuplot'
|
376
|
+
plot_histogram( bins, output, opt[:plot], opt[:type], opt[:with], labels)
|
377
|
+
end
|
378
|
+
|
379
|
+
|
data/histogram.gemspec
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'histogram/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "histogram"
|
8
|
+
spec.version = Histogram::VERSION
|
9
|
+
spec.authors = ["John T. Prince"]
|
10
|
+
spec.email = ["jtprince@gmail.com"]
|
11
|
+
spec.description = %q{gives objects the ability to 'histogram' in several useful ways}
|
12
|
+
spec.summary = %q{histograms data in different ways}
|
13
|
+
spec.homepage = ""
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
|
22
|
+
[ "bundler ~> 1.3",
|
23
|
+
"rake ~> 10.1.0",
|
24
|
+
"simplecov ~> 0.7.1",
|
25
|
+
"rspec ~> 2.13.0",
|
26
|
+
"narray",
|
27
|
+
].each do |argline|
|
28
|
+
spec.add_development_dependency *argline.split(' ', 2).compact
|
29
|
+
end
|
30
|
+
end
|
data/lib/histogram.rb
ADDED
@@ -0,0 +1,348 @@
|
|
1
|
+
|
2
|
+
class NArray
|
3
|
+
end
|
4
|
+
|
5
|
+
module Histogram
|
6
|
+
DEFAULT_BIN_METHOD = :scott
|
7
|
+
DEFAULT_QUARTILE_METHOD = :moore_mccabe
|
8
|
+
|
9
|
+
class << self
|
10
|
+
# returns (min, max)
|
11
|
+
def minmax(obj)
|
12
|
+
if obj.is_a?(Array)
|
13
|
+
obj.minmax
|
14
|
+
else
|
15
|
+
mn = obj[0]
|
16
|
+
mx = obj[0]
|
17
|
+
obj.each do |val|
|
18
|
+
if val < mn then mn = val end
|
19
|
+
if val > mx then mx = val end
|
20
|
+
end
|
21
|
+
[mn, mx]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# returns (mean, standard_dev)
|
26
|
+
# if size == 0 returns [nil, nil]
|
27
|
+
def sample_stats(obj)
|
28
|
+
_len = obj.size
|
29
|
+
return [nil, nil] if _len == 0
|
30
|
+
_sum = 0.0 ; _sum_sq = 0.0
|
31
|
+
obj.each do |val|
|
32
|
+
_sum += val
|
33
|
+
_sum_sq += val * val
|
34
|
+
end
|
35
|
+
std_dev = _sum_sq - ((_sum * _sum)/_len)
|
36
|
+
std_dev /= ( _len > 1 ? _len-1 : 1 )
|
37
|
+
[_sum.to_f/_len, Math.sqrt(std_dev)]
|
38
|
+
end
|
39
|
+
|
40
|
+
# opts:
|
41
|
+
#
|
42
|
+
# defaults:
|
43
|
+
# :method => :moore_mccabe, :tukey
|
44
|
+
# :sorted => false
|
45
|
+
#
|
46
|
+
def iqrange(obj, opts={})
|
47
|
+
opt = {method: DEFAULT_QUARTILE_METHOD, sorted: false}.merge( opts )
|
48
|
+
srted = opt[:sorted] ? obj : obj.sort
|
49
|
+
sz = srted.size
|
50
|
+
answer =
|
51
|
+
case opt[:method]
|
52
|
+
when :tukey
|
53
|
+
hi_idx = sz / 2
|
54
|
+
lo_idx = (sz % 2 == 0) ? hi_idx-1 : hi_idx
|
55
|
+
median(srted[hi_idx..-1]) - median(srted[0..lo_idx])
|
56
|
+
when :moore_mccabe
|
57
|
+
hi_idx = sz / 2
|
58
|
+
lo_idx = hi_idx - 1
|
59
|
+
hi_idx += 1 unless sz.even?
|
60
|
+
median(srted[hi_idx..-1]) - median(srted[0..lo_idx])
|
61
|
+
else
|
62
|
+
raise ArgumentError, "method must be :tukey"
|
63
|
+
end
|
64
|
+
answer.to_f
|
65
|
+
end
|
66
|
+
|
67
|
+
# finds median on a pre-sorted array
|
68
|
+
def median(sorted)
|
69
|
+
(sorted[(sorted.size - 1) / 2] + sorted[sorted.size / 2]) / 2.0
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# returns(integer) takes :scott|:sturges|:fd|:middle
|
74
|
+
#
|
75
|
+
# middle is the median between the other three values
|
76
|
+
#
|
77
|
+
# inspired by {Richard Cotton's matlab
|
78
|
+
# implementation}[http://www.mathworks.com/matlabcentral/fileexchange/21033-calculate-number-of-bins-for-histogram]
|
79
|
+
# and the {histogram page on
|
80
|
+
# wikipedia}[http://en.wikipedia.org/wiki/Histogram]
|
81
|
+
def number_of_bins(methd=DEFAULT_BIN_METHOD, quartile_method=DEFAULT_QUARTILE_METHOD)
|
82
|
+
if methd == :middle
|
83
|
+
[:scott, :sturges, :fd].map {|v| number_of_bins(v) }.sort[1]
|
84
|
+
else
|
85
|
+
nbins =
|
86
|
+
case methd
|
87
|
+
when :scott
|
88
|
+
range = (self.max - self.min).to_f
|
89
|
+
(mean, stddev) = Histogram.sample_stats(self)
|
90
|
+
range / ( 3.5*stddev*(self.size**(-1.0/3)) )
|
91
|
+
when :sturges
|
92
|
+
1 + Math::log(self.size, 2)
|
93
|
+
when :fd
|
94
|
+
2 * Histogram.iqrange(self, method: quartile_method) * (self.size**(-1.0/3))
|
95
|
+
end
|
96
|
+
nbins = 1 if nbins <= 0
|
97
|
+
nbins.ceil.to_i
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
# Returns [bins, freqs]
|
102
|
+
#
|
103
|
+
# histogram(bins, opts)
|
104
|
+
# histogram(opts)
|
105
|
+
#
|
106
|
+
# Options:
|
107
|
+
#
|
108
|
+
# :bins => :scott Scott's method range/(3.5σ * n^(-1/3))
|
109
|
+
# :fd Freedman-Diaconis range/(2*iqrange *n^(-1/3)) (default)
|
110
|
+
# :sturges Sturges' method log_2(n) + 1 (overly smooth for n > 200)
|
111
|
+
# :middle the median between :fd, :scott, and :sturges
|
112
|
+
# <Integer> give the number of bins
|
113
|
+
# <Array> specify the bins themselves
|
114
|
+
#
|
115
|
+
# :bin_boundary => :avg boundary is the avg between bins (default)
|
116
|
+
# :min bins specify the minima for binning
|
117
|
+
#
|
118
|
+
# :bin_width => <float> width of a bin (overrides :bins)
|
119
|
+
# :min => <float> # explicitly set the min
|
120
|
+
# :max => <float> # explicitly set the max val
|
121
|
+
#
|
122
|
+
# :other_sets => an array of other sets to histogram
|
123
|
+
#
|
124
|
+
# Examples
|
125
|
+
#
|
126
|
+
# require 'histogram/array'
|
127
|
+
# ar = [-2,1,2,3,3,3,4,5,6,6]
|
128
|
+
# # these return: [bins, freqencies]
|
129
|
+
# ar.histogram(20) # use 20 bins
|
130
|
+
# ar.histogram([-3,-1,4,5,6], :bin_boundary => :avg) # custom bins
|
131
|
+
#
|
132
|
+
# # returns [bins, freq1, freq2 ...]
|
133
|
+
# (bins, *freqs) = ar.histogram(30, :bin_boundary => :avg, :other_sets => [3,3,4,4,5], [-1,0,0,3,3,6])
|
134
|
+
# (ar_freqs, other1, other2) = freqs
|
135
|
+
#
|
136
|
+
# # histogramming with weights
|
137
|
+
# w_weights.histogram(20, :weights => [3,3,8,8,9,9,3,3,3,3])
|
138
|
+
#
|
139
|
+
# # with NArray
|
140
|
+
# require 'histogram/narray'
|
141
|
+
# NArray.float(20).random!(3).histogram(20)
|
142
|
+
# # => [bins, freqs] # are both NArray.float objects
|
143
|
+
#
|
144
|
+
# Notes
|
145
|
+
#
|
146
|
+
# * The lowest bin will be min, highest bin the max unless array given.
|
147
|
+
# * Assumes that bins are increasing.
|
148
|
+
# * :avg means that the boundary between the specified bins is at the avg
|
149
|
+
# between the bins (rounds up )
|
150
|
+
# * :min means that to fit in the bin it must be >= the bin and < the next
|
151
|
+
# (so, values lower than first bin are not included, but all values
|
152
|
+
# higher, than last bin are included. Current implementation of custom
|
153
|
+
# bins is slow.
|
154
|
+
# * if other_sets are supplied, the same bins will be used for all the sets.
|
155
|
+
# It is useful if you just want a certain number of bins and for the sets
|
156
|
+
# to share the exact same bins. In this case returns [bins, freqs(caller),
|
157
|
+
# freqs1, freqs2 ...]
|
158
|
+
# * Can also deal with weights. :weights should provide parallel arrays to
|
159
|
+
# the caller and any :other_sets provided.
|
160
|
+
def histogram(*args)
|
161
|
+
make_freqs_proc = lambda do |obj, len|
|
162
|
+
if obj.is_a?(Array)
|
163
|
+
Array.new(len, 0.0)
|
164
|
+
elsif obj.is_a?(NArray)
|
165
|
+
NArray.float(len)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
case args.size
|
170
|
+
when 2
|
171
|
+
(bins, opts) = args
|
172
|
+
when 1
|
173
|
+
arg = args.shift
|
174
|
+
if arg.is_a?(Hash)
|
175
|
+
opts = arg
|
176
|
+
else
|
177
|
+
bins = arg
|
178
|
+
opts = {}
|
179
|
+
end
|
180
|
+
when 0
|
181
|
+
opts = {}
|
182
|
+
bins = nil
|
183
|
+
else
|
184
|
+
raise ArgumentError, "accepts no more than 2 args"
|
185
|
+
end
|
186
|
+
|
187
|
+
opts = ({ :bin_boundary => :avg, :other_sets => [] }).merge(opts)
|
188
|
+
|
189
|
+
bins = opts[:bins] if opts[:bins]
|
190
|
+
bins = DEFAULT_BIN_METHOD unless bins
|
191
|
+
|
192
|
+
bin_boundary = opts[:bin_boundary]
|
193
|
+
other_sets = opts[:other_sets]
|
194
|
+
|
195
|
+
bins_array_like = bins.kind_of?(Array) || bins.kind_of?(NArray) || opts[:bin_width]
|
196
|
+
all = [self] + other_sets
|
197
|
+
|
198
|
+
if bins.is_a?(Symbol)
|
199
|
+
bins = number_of_bins(bins)
|
200
|
+
end
|
201
|
+
|
202
|
+
weights =
|
203
|
+
if opts[:weights]
|
204
|
+
have_frac_freqs = true
|
205
|
+
opts[:weights][0].is_a?(Numeric) ? [ opts[:weights] ] : opts[:weights]
|
206
|
+
else
|
207
|
+
[]
|
208
|
+
end
|
209
|
+
|
210
|
+
# we need to know the limits of the bins if we need to define our own bins
|
211
|
+
if opts[:bin_width] || !bins_array_like
|
212
|
+
calc_min, calc_max =
|
213
|
+
unless opts[:min] && opts[:max]
|
214
|
+
(mins, maxs) = all.map {|ar| Histogram.minmax(ar) }.transpose
|
215
|
+
[mins.min, maxs.max]
|
216
|
+
end
|
217
|
+
end
|
218
|
+
_min = opts[:min] || calc_min
|
219
|
+
_max = opts[:max] || calc_max
|
220
|
+
|
221
|
+
if opts[:bin_width]
|
222
|
+
bins = []
|
223
|
+
_min.step(_max, opts[:bin_width]) {|v| bins << v }
|
224
|
+
end
|
225
|
+
|
226
|
+
_bins = nil
|
227
|
+
_freqs = nil
|
228
|
+
if bins_array_like
|
229
|
+
########################################################
|
230
|
+
# ARRAY BINS:
|
231
|
+
########################################################
|
232
|
+
_bins =
|
233
|
+
if bins.is_a?(Array)
|
234
|
+
bins.map {|v| v.to_f }
|
235
|
+
elsif bins.is_a?(NArray)
|
236
|
+
bins.to_f
|
237
|
+
end
|
238
|
+
case bin_boundary
|
239
|
+
when :avg
|
240
|
+
freqs_ar = all.zip(weights).map do |xvals, yvals|
|
241
|
+
|
242
|
+
_freqs = make_freqs_proc.call(xvals, bins.size)
|
243
|
+
|
244
|
+
break_points = []
|
245
|
+
(0...(bins.size)).each do |i|
|
246
|
+
bin = bins[i]
|
247
|
+
break if i == (bins.size - 1)
|
248
|
+
break_points << avg_ints(bin,bins[i+1])
|
249
|
+
end
|
250
|
+
(0...(xvals.size)).each do |i|
|
251
|
+
val = xvals[i]
|
252
|
+
height = have_frac_freqs ? yvals[i] : 1
|
253
|
+
if val < break_points.first
|
254
|
+
_freqs[0] += height
|
255
|
+
elsif val >= break_points.last
|
256
|
+
_freqs[-1] += height
|
257
|
+
else
|
258
|
+
(0...(break_points.size-1)).each do |i|
|
259
|
+
if val >= break_points[i] && val < break_points[i+1]
|
260
|
+
_freqs[i+1] += height
|
261
|
+
break
|
262
|
+
end
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|
266
|
+
_freqs
|
267
|
+
end
|
268
|
+
when :min
|
269
|
+
freqs_ar = all.zip(weights).map do |xvals, yvals|
|
270
|
+
|
271
|
+
#_freqs = VecI.new(bins.size, 0)
|
272
|
+
_freqs = make_freqs_proc.call(xvals, bins.size)
|
273
|
+
(0...(xvals.size)).each do |i|
|
274
|
+
val = xvals[i]
|
275
|
+
height = have_frac_freqs ? yvals[i] : 1
|
276
|
+
last_i = 0
|
277
|
+
last_found_j = false
|
278
|
+
(0...(_bins.size)).each do |j|
|
279
|
+
if val >= _bins[j]
|
280
|
+
last_found_j = j
|
281
|
+
elsif last_found_j
|
282
|
+
break
|
283
|
+
end
|
284
|
+
end
|
285
|
+
if last_found_j ; _freqs[last_found_j] += height ; end
|
286
|
+
end
|
287
|
+
_freqs
|
288
|
+
end
|
289
|
+
end
|
290
|
+
else
|
291
|
+
########################################################
|
292
|
+
# NUMBER OF BINS:
|
293
|
+
########################################################
|
294
|
+
# Create the scaling factor
|
295
|
+
|
296
|
+
dmin = _min.to_f
|
297
|
+
conv = bins.to_f/(_max - _min)
|
298
|
+
|
299
|
+
_bins =
|
300
|
+
if self.is_a?(Array)
|
301
|
+
Array.new(bins)
|
302
|
+
elsif self.is_a?(NArray)
|
303
|
+
NArray.float(bins)
|
304
|
+
end
|
305
|
+
|
306
|
+
freqs_ar = all.zip(weights).map do |xvals, yvals|
|
307
|
+
|
308
|
+
# initialize arrays
|
309
|
+
_freqs = make_freqs_proc.call(xvals, bins)
|
310
|
+
_len = size
|
311
|
+
|
312
|
+
# Create the histogram:
|
313
|
+
(0...(xvals.size)).each do |i|
|
314
|
+
val = xvals[i]
|
315
|
+
height = have_frac_freqs ? yvals[i] : 1
|
316
|
+
index = ((val-_min)*conv).floor
|
317
|
+
if index == bins
|
318
|
+
index -= 1
|
319
|
+
end
|
320
|
+
_freqs[index] += height
|
321
|
+
end
|
322
|
+
_freqs
|
323
|
+
end
|
324
|
+
|
325
|
+
# Create the bins:
|
326
|
+
iconv = 1.0/conv
|
327
|
+
case bin_boundary
|
328
|
+
when :avg
|
329
|
+
(0...bins).each do |i|
|
330
|
+
_bins[i] = ((i+0.5) * iconv) + dmin
|
331
|
+
end
|
332
|
+
when :min
|
333
|
+
(0...bins).each do |i|
|
334
|
+
_bins[i] = (i * iconv) + dmin
|
335
|
+
end
|
336
|
+
end
|
337
|
+
end
|
338
|
+
[_bins] + freqs_ar
|
339
|
+
end
|
340
|
+
|
341
|
+
def avg_ints(one, two) # :nodoc:
|
342
|
+
(one.to_f + two.to_f) / 2.0
|
343
|
+
end
|
344
|
+
|
345
|
+
end
|
346
|
+
|
347
|
+
|
348
|
+
|
@@ -0,0 +1,162 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
require 'histogram'
|
4
|
+
|
5
|
+
RSpec::Matchers.define :be_within_rounding_error_of do |expected|
|
6
|
+
match do |actual|
|
7
|
+
(act, exp) = [actual, expected].map {|ar| ar.collect {|v| v.to_f.round(8) } }
|
8
|
+
act.to_a.should == exp.to_a
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
shared_examples 'something that can histogram' do
|
13
|
+
it 'makes histograms with the specified number of bins' do
|
14
|
+
(bins, freqs) = obj0.histogram(5)
|
15
|
+
[bins, freqs].each {|ar| ar.should be_a(obj0.class) }
|
16
|
+
[bins,freqs].zip( [ [1,3,5,7,9], [2,2,2,2,3] ] ).each do |ar, exp|
|
17
|
+
ar.should be_within_rounding_error_of exp
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'returns bins as the min boundary if given that option' do
|
22
|
+
(bins, freqs) = obj0.histogram(5, :bin_boundary => :min)
|
23
|
+
[bins, freqs].zip( [ [0,2,4,6,8], [2,2,2,2,3] ] ) do |ar, exp|
|
24
|
+
ar.should be_within_rounding_error_of exp
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
it 'makes histograms when given the bins' do
|
29
|
+
bins, freqs = obj1.histogram([1,3,5,7,9])
|
30
|
+
[bins, freqs].zip( [ [1,3,5,7,9], [3,1,1,2,3] ] ) do |ar, exp|
|
31
|
+
ar.should be_within_rounding_error_of exp
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'interprets bins as the min boundary when given the bin_boundary option' do
|
36
|
+
bins, freqs = obj2.histogram([1,3,5,7,9], :bin_boundary => :min)
|
37
|
+
[bins, freqs].zip( [ [1,3,5,7,9], [3,0,2,2,3] ] ) do |ar, exp|
|
38
|
+
ar.should be_within_rounding_error_of exp
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'can histogram multiple sets' do
|
43
|
+
(bins, freq1, freq2, freq3) = obj3.histogram([1,2,3,4], :other_sets => [obj4, obj4])
|
44
|
+
bins.should be_within_rounding_error_of [1,2,3,4]
|
45
|
+
freq1.should be_within_rounding_error_of [2.0, 2.0, 2.0, 3.0]
|
46
|
+
freq2.should be_within_rounding_error_of [0.0, 5.0, 0.0, 1.0]
|
47
|
+
freq3.should be_within_rounding_error_of freq2
|
48
|
+
end
|
49
|
+
|
50
|
+
it 'works with a given min val' do
|
51
|
+
(bins, freqs) = obj5.histogram(4, :min => 2, :bin_boundary => :min)
|
52
|
+
[bins, freqs].zip( [ [2.0, 3.5, 5.0, 6.5], [4.0, 1.0, 2.0, 3.0] ] ) do |ar, exp|
|
53
|
+
ar.should be_within_rounding_error_of exp
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
it 'works with a given max val' do
|
58
|
+
(bins, freqs) = obj5.histogram(4, :max => 7, :bin_boundary => :min)
|
59
|
+
[bins, freqs].zip( [ [1.0, 2.5, 4.0, 5.5] ,[2.0, 3.0, 2.0, 3.0] ] ) do |ar, exp|
|
60
|
+
ar.should be_within_rounding_error_of exp
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
it 'works with given min/max vals' do
|
65
|
+
(bins, freqs) = obj5.histogram(4, :min => 2, :max => 7, :bin_boundary => :min)
|
66
|
+
[bins, freqs].zip( [ [2.0, 3.25, 4.5, 5.75], [4.0, 1.0, 1.0, 4.0] ] ) do |ar, exp|
|
67
|
+
ar.should be_within_rounding_error_of exp
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
it 'can use equal weights' do
|
72
|
+
weights = Array.new(obj1.size, 3)
|
73
|
+
bins, freqs = obj1.histogram([1,3,5,7,9], :weights => weights)
|
74
|
+
[bins, freqs].zip( [ [1,3,5,7,9], [3,1,1,2,3].map {|v| v * 3} ] ) do |ar, exp|
|
75
|
+
ar.should be_within_rounding_error_of exp
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
it 'can use unequal weights' do
|
80
|
+
weights = [10, 0, 0, 0, 50, 0, 0, 0, 0.2, 0.2]
|
81
|
+
(bins, freqs) = obj1.histogram([1,3,5,7,9], :weights => weights)
|
82
|
+
[bins, freqs].zip( [ [1,3,5,7,9], [10, 0, 50, 0, 0.4] ] ) do |ar, exp|
|
83
|
+
ar.should be_within_rounding_error_of exp
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
|
89
|
+
describe Histogram do
|
90
|
+
tmp = {
|
91
|
+
obj0: (0..10).to_a,
|
92
|
+
obj1: [0, 1, 1.5, 2.0, 5.0, 6.0, 7, 8, 9, 9],
|
93
|
+
obj2: [-1, 0, 1, 1.5, 2.0, 5.0, 6.0, 7, 8, 9, 9, 10],
|
94
|
+
obj3: [1, 1, 2, 2, 3, 3, 4, 4, 4],
|
95
|
+
obj4: [2, 2, 2, 2, 2, 4],
|
96
|
+
obj5: [1,2,3,3,3,4,5,6,7,8],
|
97
|
+
}
|
98
|
+
data = tmp.each {|k,v| [k, v.map(&:to_f).extend(Histogram)] }
|
99
|
+
|
100
|
+
let(:data) { data }
|
101
|
+
|
102
|
+
data.each do |obj, ar|
|
103
|
+
let(obj) { ar.map(&:to_f).extend(Histogram) }
|
104
|
+
end
|
105
|
+
|
106
|
+
describe Array do
|
107
|
+
it_behaves_like 'something that can histogram'
|
108
|
+
end
|
109
|
+
|
110
|
+
have_narray =
|
111
|
+
begin
|
112
|
+
require 'narray'
|
113
|
+
NArray.respond_to?(:to_na)
|
114
|
+
true
|
115
|
+
rescue
|
116
|
+
false
|
117
|
+
end
|
118
|
+
|
119
|
+
describe NArray, :pending => !have_narray do
|
120
|
+
data.each do |obj, ar|
|
121
|
+
let(obj) { NArray.to_na(ar).to_f.extend(Histogram) }
|
122
|
+
end
|
123
|
+
it_behaves_like 'something that can histogram'
|
124
|
+
end
|
125
|
+
|
126
|
+
describe 'calculating bins' do
|
127
|
+
let(:even) {
|
128
|
+
[1,2,3,4,5,6,7,8].extend(Histogram)
|
129
|
+
}
|
130
|
+
let(:odd) { even[0..-2] }
|
131
|
+
|
132
|
+
let(:data_array) {
|
133
|
+
[0,1,2,2,2,2,2,3,3,3,3,3,3,3,3,3,5,5,9,9,10,20,15,15,15,16,17].extend(Histogram)
|
134
|
+
}
|
135
|
+
|
136
|
+
it 'calculates :sturges, :scott, :fd, or :middle' do
|
137
|
+
answers = [6,3,6,6]
|
138
|
+
[:sturges, :scott, :fd, :middle].zip(answers) do |mth, answ|
|
139
|
+
# these are **frozen**, not checked against other implementations, yet
|
140
|
+
# However, I've meticulously gone over the implementation of sturges, scott
|
141
|
+
# and fd and am confident they are correct.
|
142
|
+
# Note, there is some room for disagreement with how an interquartile
|
143
|
+
# range is calculated (I only have 2 simple methods implemented here).
|
144
|
+
# Also, I take the ceil of the resulting value and others may round.
|
145
|
+
data_array.number_of_bins(mth).should == answ
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
it 'calculates the interquartile range via moore_mccabe' do
|
150
|
+
Histogram.iqrange(even, method: :moore_mccabe).should == 4.0
|
151
|
+
Histogram.iqrange(odd, method: :moore_mccabe).should == 4.0
|
152
|
+
end
|
153
|
+
|
154
|
+
it 'calculates the interquartile range via tukey' do
|
155
|
+
Histogram.iqrange(even, method: :tukey).should == 4.0
|
156
|
+
Histogram.iqrange(odd, method: :tukey).should == 3.0
|
157
|
+
end
|
158
|
+
|
159
|
+
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'simplecov'
|
2
|
+
SimpleCov.start
|
3
|
+
|
4
|
+
require 'rspec'
|
5
|
+
|
6
|
+
require 'rspec/core/formatters/progress_formatter'
|
7
|
+
# doesn't say so much about pending guys
|
8
|
+
class QuietPendingFormatter < RSpec::Core::Formatters::ProgressFormatter
|
9
|
+
def example_pending(example)
|
10
|
+
output.print pending_color('*')
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
require 'rspec/core/formatters/documentation_formatter'
|
15
|
+
class QuietPendingDocFormatter < RSpec::Core::Formatters::DocumentationFormatter
|
16
|
+
def example_pending(example)
|
17
|
+
output.puts pending_color( "<pending>: #{example.execution_result[:pending_message]}" )
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
RSpec.configure do |config|
|
22
|
+
config.treat_symbols_as_metadata_keys_with_true_values = true
|
23
|
+
config.formatter = QuietPendingDocFormatter
|
24
|
+
config.color = true
|
25
|
+
end
|
26
|
+
|
27
|
+
TESTFILES = File.dirname(__FILE__) + "/testfiles"
|
28
|
+
|
29
|
+
|
metadata
ADDED
@@ -0,0 +1,131 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: histogram
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- John T. Prince
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-08-28 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ~>
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 10.1.0
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ~>
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 10.1.0
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: simplecov
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.7.1
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.7.1
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 2.13.0
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ~>
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 2.13.0
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: narray
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - '>='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '>='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
description: gives objects the ability to 'histogram' in several useful ways
|
84
|
+
email:
|
85
|
+
- jtprince@gmail.com
|
86
|
+
executables:
|
87
|
+
- histogram
|
88
|
+
extensions: []
|
89
|
+
extra_rdoc_files: []
|
90
|
+
files:
|
91
|
+
- .gitignore
|
92
|
+
- Gemfile
|
93
|
+
- LICENSE
|
94
|
+
- README.md
|
95
|
+
- Rakefile
|
96
|
+
- bin/histogram
|
97
|
+
- histogram.gemspec
|
98
|
+
- lib/histogram.rb
|
99
|
+
- lib/histogram/array.rb
|
100
|
+
- lib/histogram/narray.rb
|
101
|
+
- lib/histogram/plot.rb
|
102
|
+
- lib/histogram/version.rb
|
103
|
+
- spec/histogram_spec.rb
|
104
|
+
- spec/spec_helper.rb
|
105
|
+
homepage: ''
|
106
|
+
licenses:
|
107
|
+
- MIT
|
108
|
+
metadata: {}
|
109
|
+
post_install_message:
|
110
|
+
rdoc_options: []
|
111
|
+
require_paths:
|
112
|
+
- lib
|
113
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
119
|
+
requirements:
|
120
|
+
- - '>='
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: '0'
|
123
|
+
requirements: []
|
124
|
+
rubyforge_project:
|
125
|
+
rubygems_version: 2.0.3
|
126
|
+
signing_key:
|
127
|
+
specification_version: 4
|
128
|
+
summary: histograms data in different ways
|
129
|
+
test_files:
|
130
|
+
- spec/histogram_spec.rb
|
131
|
+
- spec/spec_helper.rb
|