scotchit 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/scotchit.rb ADDED
@@ -0,0 +1,108 @@
1
+ # ruby scotchit.rb
2
+ #
3
+ # This program is free software: you can redistribute it and/or modify
4
+ # it under the terms of the GNU General Public License as published by
5
+ # the Free Software Foundation, either version 2 of the License, or
6
+ # (at your option) any later version.
7
+ #
8
+ # This program is distributed in the hope that it will be useful,
9
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
+ # GNU General Public License for more details.
12
+ #
13
+ # You should have received a copy of the GNU General Public License
14
+ # along with this program. If not, see <http://www.gnu.org/licenses/>.
15
+ #
16
+ require 'scotchit/version'
17
+ require 'csv'
18
+ require 'statistics2'
19
+
20
+ module Scotchit
21
+ #
22
+ # seeding the hashes
23
+ DB = {}
24
+ Stats = {}
25
+ Price = {}
26
+ #
27
+ # In order with the philosophy of reddit, this program evaluates the ratings that
28
+ # redditors have given to all of the whiskys submitted to review by executing a
29
+ # Wilson Score confidence interval (lower bound). This is the same algroithm that
30
+ # reddit users interally for ratings and thus what you see when you visit the site.
31
+ #
32
+ # Evan Miller's Wilson Score Interval fn
33
+ def ci_lower_bound(pos, n, confidence)
34
+ if n == 0
35
+ return 0
36
+ end
37
+ z = Statistics2.pnormaldist(1-(1-confidence)/2)
38
+ phat = 1.0*pos/n
39
+ (phat + z*z/(2*n) - z * Math.sqrt((phat*(1-phat)+z*z/(4*n))/n))/(1+z*z/n)
40
+ end
41
+ module_function :ci_lower_bound
42
+ #
43
+ # grok the ratings.csv file
44
+ def parse_csv(file_name)
45
+ CSV.foreach(file_name, headers:true) do |row|
46
+ # ignore non-Scotch things
47
+ if %w[Bourbon Rye Grain Tennessee Liqueur Wheat].include? row['Region']
48
+ next
49
+ end
50
+ # clean up key name
51
+ name = row["Whisky Name"].strip
52
+ # get the rating as an integer
53
+ val = row['Rating'].to_i
54
+ # get the cost as a floating point by removing non-digits and throwing out
55
+ # un-american funny money
56
+ if !%w[£ CDN CAD AUD € GBP NZD EUR CAN].any? {|x| row['Price'].to_s.upcase.include? x} #ugly
57
+ cost = row['Price'].to_s.gsub(/[^\d\.]/, '').to_f
58
+ else
59
+ cost = 0.0
60
+ end
61
+ # seed key:val
62
+ if !DB.has_key?(name)
63
+ DB[name] = []
64
+ Price[name] = []
65
+ end
66
+ # append score
67
+ DB[name] << ((val == nil) ? 0 : val)
68
+ if cost > 20.0
69
+ Price[name] << cost
70
+ end
71
+ end
72
+ end
73
+ module_function :parse_csv
74
+ #
75
+ # do the confidence interval computations for all scotches meeting our thresholds.
76
+ def score()
77
+ DB.keys.each do |k|
78
+ if DB[k].count > 5 # only eval whisky that has 6 or more reviews on reddit
79
+ pos = DB[k].count {|x| x > 90 } # threshold as positive vote 91+% rating
80
+ # calculate the Price indicator
81
+ average = (Price[k] != []) ? Price[k].reduce(:+) / Price[k].count : 0.0
82
+ indicator = case average.round(0)
83
+ when 0 then "?"
84
+ when 1..39 then "$"
85
+ when 40..69 then "$$"
86
+ when 70..89 then "$$$"
87
+ when 90..120 then "$$$$"
88
+ else
89
+ "$$$$$"
90
+ end
91
+ Stats[k] = [(ci_lower_bound(pos, DB[k].count, 0.975) * 100.0).round(0), pos, DB[k].count, indicator]
92
+ end
93
+ end
94
+ end
95
+ module_function :score
96
+ #
97
+ # reverse sort by confidence score and print to term
98
+ #
99
+ # we are only displaying whole number percentages in order to avoid the presception that this is accurate to
100
+ # some decimal of score. we're trying to select scotches that have good confidence of being yummy and deserving
101
+ # of the price via the sample of redditors.
102
+ def run()
103
+ parse_csv("lib/ratings.csv")
104
+ score()
105
+ Stats.sort_by {|k,i| -i[0]}.each {|k,i| (i[0] > 20.0 && (puts "#{k}: #{i[0]}% (#{i[1]}+, #{i[2]}#) #{i[3]}")) || nil}
106
+ end
107
+ module_function :run
108
+ end
@@ -0,0 +1,3 @@
1
+ module Scotchit
2
+ VERSION = "1.0.0"
3
+ end
data/scotchit.gemspec ADDED
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'scotchit/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "scotchit"
8
+ spec.version = Scotchit::VERSION
9
+ spec.authors = ["Robert Hardin"]
10
+ spec.email = ["rob@roberthardin.net"]
11
+ spec.summary = %q{A scotch recommendation engine based on reddit.com/r/scotch's review archive.}
12
+ spec.description = %q{Runs a lower bound confidence score on scotchit's whisky review archive to generate buy recommendations.}
13
+ spec.homepage = "https://github.com/rhardin/scotchit"
14
+ spec.license = "GPL"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.7"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "statistics2", "~> 0.54"
24
+ end
data/test/run_test.rb ADDED
@@ -0,0 +1,21 @@
1
+ require "minitest/autorun"
2
+
3
+ require "scotchit"
4
+
5
+ class StatsTest < Minitest::Unit::TestCase #Minitest::Test
6
+ def test_ci
7
+ assert_equal 0.6656067501944406, Scotchit.ci_lower_bound(10, 10, 0.975)
8
+ end
9
+
10
+ def test_parse
11
+ Scotchit.parse_csv("lib/ratings.csv")
12
+ assert Scotchit::DB.has_key?("Ardbeg Uigeadail")
13
+ end
14
+
15
+ def test_score
16
+ Scotchit.parse_csv("lib/ratings.csv")
17
+ Scotchit.score()
18
+ assert Scotchit::Stats.has_key?("Ardbeg Uigeadail")
19
+ assert Scotchit::Price.has_key?("Ardbeg Uigeadail")
20
+ end
21
+ end
metadata ADDED
@@ -0,0 +1,102 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: scotchit
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Robert Hardin
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-01-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: statistics2
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.54'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.54'
55
+ description: Runs a lower bound confidence score on scotchit's whisky review archive
56
+ to generate buy recommendations.
57
+ email:
58
+ - rob@roberthardin.net
59
+ executables:
60
+ - scotchit
61
+ - scotchit.rb
62
+ extensions: []
63
+ extra_rdoc_files: []
64
+ files:
65
+ - ".gitignore"
66
+ - Gemfile
67
+ - LICENSE
68
+ - README.md
69
+ - Rakefile
70
+ - bin/scotchit
71
+ - bin/scotchit.rb
72
+ - lib/ratings.csv
73
+ - lib/scotchit.rb
74
+ - lib/scotchit/version.rb
75
+ - scotchit.gemspec
76
+ - test/run_test.rb
77
+ homepage: https://github.com/rhardin/scotchit
78
+ licenses:
79
+ - GPL
80
+ metadata: {}
81
+ post_install_message:
82
+ rdoc_options: []
83
+ require_paths:
84
+ - lib
85
+ required_ruby_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ required_rubygems_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ">="
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
95
+ requirements: []
96
+ rubyforge_project:
97
+ rubygems_version: 2.2.2
98
+ signing_key:
99
+ specification_version: 4
100
+ summary: A scotch recommendation engine based on reddit.com/r/scotch's review archive.
101
+ test_files:
102
+ - test/run_test.rb