scotchit 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/scotchit.rb ADDED
@@ -0,0 +1,108 @@
1
+ # ruby scotchit.rb
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 2 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+ #
16
+ require 'scotchit/version'
17
+ require 'csv'
18
+ require 'statistics2'
19
+
20
+ module Scotchit
21
+ #
22
+ # seeding the hashes
23
+ DB = {}
24
+ Stats = {}
25
+ Price = {}
26
+ #
27
+ # In order with the philosophy of reddit, this program evaluates the ratings that
28
+ # redditors have given to all of the whiskys submitted to review by executing a
29
+ # Wilson Score confidence interval (lower bound). This is the same algroithm that
30
+ # reddit users interally for ratings and thus what you see when you visit the site.
31
+ #
32
+ # Evan Miller's Wilson Score Interval fn
33
+ def ci_lower_bound(pos, n, confidence)
34
+ if n == 0
35
+ return 0
36
+ end
37
+ z = Statistics2.pnormaldist(1-(1-confidence)/2)
38
+ phat = 1.0*pos/n
39
+ (phat + z*z/(2*n) - z * Math.sqrt((phat*(1-phat)+z*z/(4*n))/n))/(1+z*z/n)
40
+ end
41
+ module_function :ci_lower_bound
42
+ #
43
+ # grok the ratings.csv file
44
+ def parse_csv(file_name)
45
+ CSV.foreach(file_name, headers:true) do |row|
46
+ # ignore non-Scotch things
47
+ if %w[Bourbon Rye Grain Tennessee Liqueur Wheat].include? row['Region']
48
+ next
49
+ end
50
+ # clean up key name
51
+ name = row["Whisky Name"].strip
52
+ # get the rating as an integer
53
+ val = row['Rating'].to_i
54
+ # get the cost as a floating point by removing non-digits and throwing out
55
+ # un-american funny money
56
+ if !%w[£ CDN CAD AUD € GBP NZD EUR CAN].any? {|x| row['Price'].to_s.upcase.include? x} #ugly
57
+ cost = row['Price'].to_s.gsub(/[^\d\.]/, '').to_f
58
+ else
59
+ cost = 0.0
60
+ end
61
+ # seed key:val
62
+ if !DB.has_key?(name)
63
+ DB[name] = []
64
+ Price[name] = []
65
+ end
66
+ # append score
67
+ DB[name] << ((val == nil) ? 0 : val)
68
+ if cost > 20.0
69
+ Price[name] << cost
70
+ end
71
+ end
72
+ end
73
+ module_function :parse_csv
74
+ #
75
+ # do the confidence interval computations for all scotches meeting our thresholds.
76
+ def score()
77
+ DB.keys.each do |k|
78
+ if DB[k].count > 5 # only eval whisky that has 6 or more reviews on reddit
79
+ pos = DB[k].count {|x| x > 90 } # threshold as positive vote 91+% rating
80
+ # calculate the Price indicator
81
+ average = (Price[k] != []) ? Price[k].reduce(:+) / Price[k].count : 0.0
82
+ indicator = case average.round(0)
83
+ when 0 then "?"
84
+ when 1..39 then "$"
85
+ when 40..69 then "$$"
86
+ when 70..89 then "$$$"
87
+ when 90..120 then "$$$$"
88
+ else
89
+ "$$$$$"
90
+ end
91
+ Stats[k] = [(ci_lower_bound(pos, DB[k].count, 0.975) * 100.0).round(0), pos, DB[k].count, indicator]
92
+ end
93
+ end
94
+ end
95
+ module_function :score
96
+ #
97
+ # reverse sort by confidence score and print to term
98
+ #
99
+ # we are only displaying whole number percentages in order to avoid the presception that this is accurate to
100
+ # some decimal of score. we're trying to select scotches that have good confidence of being yummy and deserving
101
+ # of the price via the sample of redditors.
102
+ def run()
103
+ parse_csv("lib/ratings.csv")
104
+ score()
105
+ Stats.sort_by {|k,i| -i[0]}.each {|k,i| (i[0] > 20.0 && (puts "#{k}: #{i[0]}% (#{i[1]}+, #{i[2]}#) #{i[3]}")) || nil}
106
+ end
107
+ module_function :run
108
+ end
@@ -0,0 +1,3 @@
1
+ module Scotchit
2
+ VERSION = "1.0.0"
3
+ end
data/scotchit.gemspec ADDED
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'scotchit/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "scotchit"
8
+ spec.version = Scotchit::VERSION
9
+ spec.authors = ["Robert Hardin"]
10
+ spec.email = ["rob@roberthardin.net"]
11
+ spec.summary = %q{A scotch recommendation engine based on reddit.com/r/scotch's review archive.}
12
+ spec.description = %q{Runs a lower bound confidence score on scotchit's whisky review archive to generate buy recommendations.}
13
+ spec.homepage = "https://github.com/rhardin/scotchit"
14
+ spec.license = "GPL"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.7"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "statistics2", "~> 0.54"
24
+ end
data/test/run_test.rb ADDED
@@ -0,0 +1,21 @@
1
+ require "minitest/autorun"
2
+
3
+ require "scotchit"
4
+
5
+ class StatsTest < Minitest::Unit::TestCase #Minitest::Test
6
+ def test_ci
7
+ assert_equal 0.6656067501944406, Scotchit.ci_lower_bound(10, 10, 0.975)
8
+ end
9
+
10
+ def test_parse
11
+ Scotchit.parse_csv("lib/ratings.csv")
12
+ assert Scotchit::DB.has_key?("Ardbeg Uigeadail")
13
+ end
14
+
15
+ def test_score
16
+ Scotchit.parse_csv("lib/ratings.csv")
17
+ Scotchit.score()
18
+ assert Scotchit::Stats.has_key?("Ardbeg Uigeadail")
19
+ assert Scotchit::Price.has_key?("Ardbeg Uigeadail")
20
+ end
21
+ end
metadata ADDED
@@ -0,0 +1,102 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: scotchit
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Robert Hardin
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-01-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: statistics2
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.54'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.54'
55
+ description: Runs a lower bound confidence score on scotchit's whisky review archive
56
+ to generate buy recommendations.
57
+ email:
58
+ - rob@roberthardin.net
59
+ executables:
60
+ - scotchit
61
+ - scotchit.rb
62
+ extensions: []
63
+ extra_rdoc_files: []
64
+ files:
65
+ - ".gitignore"
66
+ - Gemfile
67
+ - LICENSE
68
+ - README.md
69
+ - Rakefile
70
+ - bin/scotchit
71
+ - bin/scotchit.rb
72
+ - lib/ratings.csv
73
+ - lib/scotchit.rb
74
+ - lib/scotchit/version.rb
75
+ - scotchit.gemspec
76
+ - test/run_test.rb
77
+ homepage: https://github.com/rhardin/scotchit
78
+ licenses:
79
+ - GPL
80
+ metadata: {}
81
+ post_install_message:
82
+ rdoc_options: []
83
+ require_paths:
84
+ - lib
85
+ required_ruby_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ required_rubygems_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ">="
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
95
+ requirements: []
96
+ rubyforge_project:
97
+ rubygems_version: 2.2.2
98
+ signing_key:
99
+ specification_version: 4
100
+ summary: A scotch recommendation engine based on reddit.com/r/scotch's review archive.
101
+ test_files:
102
+ - test/run_test.rb