histograffle 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +21 -0
- data/LICENSE +20 -0
- data/README.textile +86 -0
- data/Rakefile +53 -0
- data/VERSION +1 -0
- data/lib/histograffle.rb +114 -0
- data/test/helper.rb +10 -0
- data/test/test_histograffle.rb +7 -0
- metadata +90 -0
data/.document
ADDED
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Jostein B.E.
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.textile
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
h1. Histograffle
|
2
|
+
|
3
|
+
h2. A sort of histogram object implemented in pure Ruby. Use it to count the occurrence of each of several things.
|
4
|
+
|
5
|
+
h3. Simple example of its use and abuse
|
6
|
+
|
7
|
+
I am not a matematician, so this is not a mathematical library or explaination.
|
8
|
+
|
9
|
+
Sample use case: We have two lists of words, and want to count the occurrence of each word in the lists, first separately, and then together.
|
10
|
+
|
11
|
+
<pre>
|
12
|
+
<code>
|
13
|
+
require 'rubygems'
|
14
|
+
require 'histograffle'
|
15
|
+
|
16
|
+
include Histograffle
|
17
|
+
|
18
|
+
reported_incidents = %w[rain thunder confusion fun rain rain theft]
|
19
|
+
|
20
|
+
hist_one = Histogram.new
|
21
|
+
|
22
|
+
hist_one.eat(reported_incidents)
|
23
|
+
|
24
|
+
hist_one.ladder.each do |count|
|
25
|
+
puts "#{count}: #{hist_one[count].join(', ')}"
|
26
|
+
end
|
27
|
+
|
28
|
+
hist_two = Histogram.new
|
29
|
+
|
30
|
+
unreported_incidents = %w[monkey pig concert ufo airplane pig pig concert]
|
31
|
+
|
32
|
+
hist_two.eat(unreported_incidents)
|
33
|
+
|
34
|
+
hist_one << hist_two
|
35
|
+
|
36
|
+
</code>
|
37
|
+
</pre>
|
38
|
+
|
39
|
+
h3. API quickref
|
40
|
+
|
41
|
+
|_@Histograffle::Histogram.new([serialized_data])@_|Create new instance.|
|
42
|
+
|_@hist.eat(item_or_array_or_histogram)@_|Add the input. You can add histograms together this way.|
|
43
|
+
|_@hist[n]@_|Return the occurrences of which there are _n_.|
|
44
|
+
|_@hist.distribution[n]@_|Return the occurrences of which there are _n_.|
|
45
|
+
|_@hist.distribution@_|Descending list of occurrence counts.|
|
46
|
+
|_@hist.ladder@_|Ascending list of occurrence counts.|
|
47
|
+
|_@hist.top(n)@_|Return the top n entries as a hash like @{12 => 'a_word', 2 => 'some other word'}@.|
|
48
|
+
|_@hist.flat_top(n)@_|Same as above, return only a list of words with no differentiations.|
|
49
|
+
|_@hist << other_hist@_|Add a histogram to another.|
|
50
|
+
|_@hist == other_hist@_|True when they contain counts of the same entries.|
|
51
|
+
|
52
|
+
There is also @.to_mongo@ and @.from_mongo@ serialization support.
|
53
|
+
|
54
|
+
To get copies of the raw data tables (see below), you use @hist.data@ and @hist.lookup@.
|
55
|
+
|
56
|
+
h3. Implementation
|
57
|
+
|
58
|
+
Each instance stores its data internally, in two hashes, quite simply. The _data_ hash keys occurrences to counts. The _lookup_ hash keys counts to occurrences. Occurrences can be any object.
|
59
|
+
|
60
|
+
!http://davidpsydoc.com/brian/pictures/extremely%20random,%20hamburger%20gets%20arrested.jpg!
|
61
|
+
"Credit":http://davidpsydoc.com/brian/pictures/extremely%20random
|
62
|
+
|
63
|
+
h3. LICENSE
|
64
|
+
|
65
|
+
(MIT License)
|
66
|
+
|
67
|
+
Copyright (c) 2009 Jostein Berre Eliassen
|
68
|
+
|
69
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
70
|
+
a copy of this software and associated documentation files (the
|
71
|
+
'Software'), to deal in the Software without restriction, including
|
72
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
73
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
74
|
+
permit persons to whom the Software is furnished to do so, subject to
|
75
|
+
the following conditions:
|
76
|
+
|
77
|
+
The above copyright notice and this permission notice shall be
|
78
|
+
included in all copies or substantial portions of the Software.
|
79
|
+
|
80
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
81
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
82
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
83
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
84
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
85
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
86
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "histograffle"
|
8
|
+
gem.summary = %Q{Count the occurrence of each of several things.}
|
9
|
+
gem.description = %Q{Count the occurrence of each of several things. Pure and banal Ruby.}
|
10
|
+
gem.email = "find.me@on.github"
|
11
|
+
gem.homepage = "http://github.com/jbe/histograffle"
|
12
|
+
gem.authors = ["Jostein B.E."]
|
13
|
+
gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
|
14
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
15
|
+
end
|
16
|
+
Jeweler::GemcutterTasks.new
|
17
|
+
rescue LoadError
|
18
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'rake/testtask'
|
22
|
+
Rake::TestTask.new(:test) do |test|
|
23
|
+
test.libs << 'lib' << 'test'
|
24
|
+
test.pattern = 'test/**/test_*.rb'
|
25
|
+
test.verbose = true
|
26
|
+
end
|
27
|
+
|
28
|
+
begin
|
29
|
+
require 'rcov/rcovtask'
|
30
|
+
Rcov::RcovTask.new do |test|
|
31
|
+
test.libs << 'test'
|
32
|
+
test.pattern = 'test/**/test_*.rb'
|
33
|
+
test.verbose = true
|
34
|
+
end
|
35
|
+
rescue LoadError
|
36
|
+
task :rcov do
|
37
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
task :test => :check_dependencies
|
42
|
+
|
43
|
+
task :default => :test
|
44
|
+
|
45
|
+
require 'rake/rdoctask'
|
46
|
+
Rake::RDocTask.new do |rdoc|
|
47
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
48
|
+
|
49
|
+
rdoc.rdoc_dir = 'rdoc'
|
50
|
+
rdoc.title = "histograffle #{version}"
|
51
|
+
rdoc.rdoc_files.include('README*')
|
52
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
53
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
data/lib/histograffle.rb
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
|
2
|
+
module Histograffle; end
|
3
|
+
|
4
|
+
class Histograffle::Histogram
|
5
|
+
|
6
|
+
def data
|
7
|
+
@data.dup
|
8
|
+
end
|
9
|
+
|
10
|
+
def lookup
|
11
|
+
lu = @lookup.dup
|
12
|
+
lu.each_key do |k|
|
13
|
+
lu[k] = lu[k].dup
|
14
|
+
end
|
15
|
+
lu
|
16
|
+
end
|
17
|
+
|
18
|
+
def initialize(d=[{}, {}])
|
19
|
+
@data = d[0].dup
|
20
|
+
@lookup = d[1].dup
|
21
|
+
@data.default = 0
|
22
|
+
end
|
23
|
+
|
24
|
+
def eat( o, n=1 )
|
25
|
+
if o.is_a? self.class
|
26
|
+
self << o
|
27
|
+
else
|
28
|
+
o = [o] unless o.respond_to? :each
|
29
|
+
o.each do |t|
|
30
|
+
@data[t] += n
|
31
|
+
@lookup[@data[t]] ||= []
|
32
|
+
@lookup[@data[t]] << t
|
33
|
+
@lookup[@data[t]-n].delete(t) if @lookup[@data[t]-n]
|
34
|
+
@lookup.delete(@data[t]-n) if (@lookup[@data[t]-n] || []).empty?
|
35
|
+
end
|
36
|
+
end
|
37
|
+
self
|
38
|
+
end
|
39
|
+
|
40
|
+
def [](n)
|
41
|
+
@lookup[n]
|
42
|
+
end
|
43
|
+
|
44
|
+
def distribution
|
45
|
+
ladder.reverse
|
46
|
+
end
|
47
|
+
|
48
|
+
def ladder
|
49
|
+
@lookup.keys.sort
|
50
|
+
end
|
51
|
+
|
52
|
+
def top( n )
|
53
|
+
result = {}
|
54
|
+
distribution.each do |count|
|
55
|
+
@lookup[count].each do |word|
|
56
|
+
result[count] ||= []
|
57
|
+
result[count] << word
|
58
|
+
return result if (n -= 1) < 1
|
59
|
+
end
|
60
|
+
end
|
61
|
+
result
|
62
|
+
end
|
63
|
+
|
64
|
+
def flat_top( n )
|
65
|
+
result = []
|
66
|
+
ladder.reverse.each do |count|
|
67
|
+
@lookup[count].each do |word|
|
68
|
+
result << word
|
69
|
+
return result if (n -= 1) < 1
|
70
|
+
end
|
71
|
+
end
|
72
|
+
result
|
73
|
+
|
74
|
+
end
|
75
|
+
|
76
|
+
def <<(other)
|
77
|
+
other.data.each{|k,v| eat(k,v) }
|
78
|
+
self
|
79
|
+
end
|
80
|
+
|
81
|
+
def ==(other)
|
82
|
+
return false unless other.is_a?(self.class)
|
83
|
+
[@data, @lookup] == [other.data, other.lookup]
|
84
|
+
end
|
85
|
+
|
86
|
+
def self.to_mongo(v)
|
87
|
+
# puts "to_mongo: " + v.inspect
|
88
|
+
return v if v.nil?
|
89
|
+
return Array.to_mongo(v) if v.is_a?(Array)
|
90
|
+
[v.data, stringify_keys(v.lookup)]
|
91
|
+
end
|
92
|
+
|
93
|
+
def self.from_mongo(v)
|
94
|
+
# puts "from_mongo: " + v.inspect
|
95
|
+
return v if v.nil? || v.is_a?(self)
|
96
|
+
v ? self.new([v[0], intify_keys(v[1])]) : nil
|
97
|
+
end
|
98
|
+
|
99
|
+
private
|
100
|
+
def self.stringify_keys(hsh)
|
101
|
+
r = Hash.new
|
102
|
+
hsh.each do |k,v|
|
103
|
+
r[k.to_s] = v
|
104
|
+
end
|
105
|
+
r
|
106
|
+
end
|
107
|
+
def self.intify_keys(hsh)
|
108
|
+
r = Hash.new
|
109
|
+
hsh.each do |k,v|
|
110
|
+
r[k.to_i] = v
|
111
|
+
end
|
112
|
+
r
|
113
|
+
end
|
114
|
+
end
|
data/test/helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: histograffle
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
- 0
|
10
|
+
version: 0.1.0
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Jostein B.E.
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-06-20 00:00:00 +02:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: thoughtbot-shoulda
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
version: "0"
|
33
|
+
type: :development
|
34
|
+
version_requirements: *id001
|
35
|
+
description: Count the occurrence of each of several things. Pure and banal Ruby.
|
36
|
+
email: find.me@on.github
|
37
|
+
executables: []
|
38
|
+
|
39
|
+
extensions: []
|
40
|
+
|
41
|
+
extra_rdoc_files:
|
42
|
+
- LICENSE
|
43
|
+
- README.textile
|
44
|
+
files:
|
45
|
+
- .document
|
46
|
+
- .gitignore
|
47
|
+
- LICENSE
|
48
|
+
- README.textile
|
49
|
+
- Rakefile
|
50
|
+
- VERSION
|
51
|
+
- lib/histograffle.rb
|
52
|
+
- test/helper.rb
|
53
|
+
- test/test_histograffle.rb
|
54
|
+
has_rdoc: true
|
55
|
+
homepage: http://github.com/jbe/histograffle
|
56
|
+
licenses: []
|
57
|
+
|
58
|
+
post_install_message:
|
59
|
+
rdoc_options:
|
60
|
+
- --charset=UTF-8
|
61
|
+
require_paths:
|
62
|
+
- lib
|
63
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
64
|
+
none: false
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
hash: 3
|
69
|
+
segments:
|
70
|
+
- 0
|
71
|
+
version: "0"
|
72
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ">="
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
hash: 3
|
78
|
+
segments:
|
79
|
+
- 0
|
80
|
+
version: "0"
|
81
|
+
requirements: []
|
82
|
+
|
83
|
+
rubyforge_project:
|
84
|
+
rubygems_version: 1.3.7
|
85
|
+
signing_key:
|
86
|
+
specification_version: 3
|
87
|
+
summary: Count the occurrence of each of several things.
|
88
|
+
test_files:
|
89
|
+
- test/helper.rb
|
90
|
+
- test/test_histograffle.rb
|