scotchit 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +1 -0
- data/Gemfile +4 -0
- data/LICENSE +339 -0
- data/README.md +57 -0
- data/Rakefile +9 -0
- data/bin/scotchit +5 -0
- data/bin/scotchit.rb +92 -0
- data/lib/ratings.csv +10759 -0
- data/lib/scotchit.rb +108 -0
- data/lib/scotchit/version.rb +3 -0
- data/scotchit.gemspec +24 -0
- data/test/run_test.rb +21 -0
- metadata +102 -0
data/lib/scotchit.rb
ADDED
@@ -0,0 +1,108 @@
|
|
1
|
+
# ruby scotchit.rb
|
2
|
+
#
|
3
|
+
# This program is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU General Public License as published by
|
5
|
+
# the Free Software Foundation, either version 2 of the License, or
|
6
|
+
# (at your option) any later version.
|
7
|
+
#
|
8
|
+
# This program is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the GNU General Public License
|
14
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
15
|
+
#
|
16
|
+
require 'scotchit/version'
|
17
|
+
require 'csv'
|
18
|
+
require 'statistics2'
|
19
|
+
|
20
|
+
module Scotchit
|
21
|
+
#
|
22
|
+
# seeding the hashes
|
23
|
+
DB = {}
|
24
|
+
Stats = {}
|
25
|
+
Price = {}
|
26
|
+
#
|
27
|
+
# In order with the philosophy of reddit, this program evaluates the ratings that
|
28
|
+
# redditors have given to all of the whiskys submitted to review by executing a
|
29
|
+
# Wilson Score confidence interval (lower bound). This is the same algroithm that
|
30
|
+
# reddit users interally for ratings and thus what you see when you visit the site.
|
31
|
+
#
|
32
|
+
# Evan Miller's Wilson Score Interval fn
|
33
|
+
def ci_lower_bound(pos, n, confidence)
|
34
|
+
if n == 0
|
35
|
+
return 0
|
36
|
+
end
|
37
|
+
z = Statistics2.pnormaldist(1-(1-confidence)/2)
|
38
|
+
phat = 1.0*pos/n
|
39
|
+
(phat + z*z/(2*n) - z * Math.sqrt((phat*(1-phat)+z*z/(4*n))/n))/(1+z*z/n)
|
40
|
+
end
|
41
|
+
module_function :ci_lower_bound
|
42
|
+
#
|
43
|
+
# grok the ratings.csv file
|
44
|
+
def parse_csv(file_name)
|
45
|
+
CSV.foreach(file_name, headers:true) do |row|
|
46
|
+
# ignore non-Scotch things
|
47
|
+
if %w[Bourbon Rye Grain Tennessee Liqueur Wheat].include? row['Region']
|
48
|
+
next
|
49
|
+
end
|
50
|
+
# clean up key name
|
51
|
+
name = row["Whisky Name"].strip
|
52
|
+
# get the rating as an integer
|
53
|
+
val = row['Rating'].to_i
|
54
|
+
# get the cost as a floating point by removing non-digits and throwing out
|
55
|
+
# un-american funny money
|
56
|
+
if !%w[£ CDN CAD AUD € GBP NZD EUR CAN].any? {|x| row['Price'].to_s.upcase.include? x} #ugly
|
57
|
+
cost = row['Price'].to_s.gsub(/[^\d\.]/, '').to_f
|
58
|
+
else
|
59
|
+
cost = 0.0
|
60
|
+
end
|
61
|
+
# seed key:val
|
62
|
+
if !DB.has_key?(name)
|
63
|
+
DB[name] = []
|
64
|
+
Price[name] = []
|
65
|
+
end
|
66
|
+
# append score
|
67
|
+
DB[name] << ((val == nil) ? 0 : val)
|
68
|
+
if cost > 20.0
|
69
|
+
Price[name] << cost
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
module_function :parse_csv
|
74
|
+
#
|
75
|
+
# do the confidence interval computations for all scotches meeting our thresholds.
|
76
|
+
def score()
|
77
|
+
DB.keys.each do |k|
|
78
|
+
if DB[k].count > 5 # only eval whisky that has 6 or more reviews on reddit
|
79
|
+
pos = DB[k].count {|x| x > 90 } # threshold as positive vote 91+% rating
|
80
|
+
# calculate the Price indicator
|
81
|
+
average = (Price[k] != []) ? Price[k].reduce(:+) / Price[k].count : 0.0
|
82
|
+
indicator = case average.round(0)
|
83
|
+
when 0 then "?"
|
84
|
+
when 1..39 then "$"
|
85
|
+
when 40..69 then "$$"
|
86
|
+
when 70..89 then "$$$"
|
87
|
+
when 90..120 then "$$$$"
|
88
|
+
else
|
89
|
+
"$$$$$"
|
90
|
+
end
|
91
|
+
Stats[k] = [(ci_lower_bound(pos, DB[k].count, 0.975) * 100.0).round(0), pos, DB[k].count, indicator]
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
module_function :score
|
96
|
+
#
|
97
|
+
# reverse sort by confidence score and print to term
|
98
|
+
#
|
99
|
+
# we are only displaying whole number percentages in order to avoid the presception that this is accurate to
|
100
|
+
# some decimal of score. we're trying to select scotches that have good confidence of being yummy and deserving
|
101
|
+
# of the price via the sample of redditors.
|
102
|
+
def run()
|
103
|
+
parse_csv("lib/ratings.csv")
|
104
|
+
score()
|
105
|
+
Stats.sort_by {|k,i| -i[0]}.each {|k,i| (i[0] > 20.0 && (puts "#{k}: #{i[0]}% (#{i[1]}+, #{i[2]}#) #{i[3]}")) || nil}
|
106
|
+
end
|
107
|
+
module_function :run
|
108
|
+
end
|
data/scotchit.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'scotchit/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "scotchit"
|
8
|
+
spec.version = Scotchit::VERSION
|
9
|
+
spec.authors = ["Robert Hardin"]
|
10
|
+
spec.email = ["rob@roberthardin.net"]
|
11
|
+
spec.summary = %q{A scotch recommendation engine based on reddit.com/r/scotch's review archive.}
|
12
|
+
spec.description = %q{Runs a lower bound confidence score on scotchit's whisky review archive to generate buy recommendations.}
|
13
|
+
spec.homepage = "https://github.com/rhardin/scotchit"
|
14
|
+
spec.license = "GPL"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
22
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
23
|
+
spec.add_development_dependency "statistics2", "~> 0.54"
|
24
|
+
end
|
data/test/run_test.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require "minitest/autorun"
|
2
|
+
|
3
|
+
require "scotchit"
|
4
|
+
|
5
|
+
class StatsTest < Minitest::Unit::TestCase #Minitest::Test
|
6
|
+
def test_ci
|
7
|
+
assert_equal 0.6656067501944406, Scotchit.ci_lower_bound(10, 10, 0.975)
|
8
|
+
end
|
9
|
+
|
10
|
+
def test_parse
|
11
|
+
Scotchit.parse_csv("lib/ratings.csv")
|
12
|
+
assert Scotchit::DB.has_key?("Ardbeg Uigeadail")
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_score
|
16
|
+
Scotchit.parse_csv("lib/ratings.csv")
|
17
|
+
Scotchit.score()
|
18
|
+
assert Scotchit::Stats.has_key?("Ardbeg Uigeadail")
|
19
|
+
assert Scotchit::Price.has_key?("Ardbeg Uigeadail")
|
20
|
+
end
|
21
|
+
end
|
metadata
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: scotchit
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Robert Hardin
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-01-17 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.7'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: statistics2
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0.54'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0.54'
|
55
|
+
description: Runs a lower bound confidence score on scotchit's whisky review archive
|
56
|
+
to generate buy recommendations.
|
57
|
+
email:
|
58
|
+
- rob@roberthardin.net
|
59
|
+
executables:
|
60
|
+
- scotchit
|
61
|
+
- scotchit.rb
|
62
|
+
extensions: []
|
63
|
+
extra_rdoc_files: []
|
64
|
+
files:
|
65
|
+
- ".gitignore"
|
66
|
+
- Gemfile
|
67
|
+
- LICENSE
|
68
|
+
- README.md
|
69
|
+
- Rakefile
|
70
|
+
- bin/scotchit
|
71
|
+
- bin/scotchit.rb
|
72
|
+
- lib/ratings.csv
|
73
|
+
- lib/scotchit.rb
|
74
|
+
- lib/scotchit/version.rb
|
75
|
+
- scotchit.gemspec
|
76
|
+
- test/run_test.rb
|
77
|
+
homepage: https://github.com/rhardin/scotchit
|
78
|
+
licenses:
|
79
|
+
- GPL
|
80
|
+
metadata: {}
|
81
|
+
post_install_message:
|
82
|
+
rdoc_options: []
|
83
|
+
require_paths:
|
84
|
+
- lib
|
85
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
91
|
+
requirements:
|
92
|
+
- - ">="
|
93
|
+
- !ruby/object:Gem::Version
|
94
|
+
version: '0'
|
95
|
+
requirements: []
|
96
|
+
rubyforge_project:
|
97
|
+
rubygems_version: 2.2.2
|
98
|
+
signing_key:
|
99
|
+
specification_version: 4
|
100
|
+
summary: A scotch recommendation engine based on reddit.com/r/scotch's review archive.
|
101
|
+
test_files:
|
102
|
+
- test/run_test.rb
|