board_game_geek 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ .rvmrc
2
+ spec/vcr_cassettes/*
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source :rubygems
2
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,28 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ board_game_geek (0.0.1)
5
+ nokogiri
6
+
7
+ GEM
8
+ remote: http://rubygems.org/
9
+ specs:
10
+ addressable (2.2.6)
11
+ crack (0.3.1)
12
+ minitest (2.8.0)
13
+ nokogiri (1.5.0)
14
+ rake (0.9.2.2)
15
+ vcr (1.11.3)
16
+ webmock (1.7.7)
17
+ addressable (~> 2.2, > 2.2.5)
18
+ crack (>= 0.1.7)
19
+
20
+ PLATFORMS
21
+ ruby
22
+
23
+ DEPENDENCIES
24
+ board_game_geek!
25
+ minitest
26
+ rake
27
+ vcr
28
+ webmock
data/Rakefile ADDED
@@ -0,0 +1,14 @@
1
+ ### Gem Development/Management
2
+ require 'bundler'
3
+ Bundler::GemHelper.install_tasks
4
+
5
+
6
+ ### Spec Runner
7
+ require 'rake/testtask'
8
+ Rake::TestTask.new :spec do |t|
9
+ t.libs << "spec"
10
+ t.pattern = "spec/*_spec.rb"
11
+ t.verbose = true
12
+ end
13
+
14
+ task :default => :spec
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "board_game_geek/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "board_game_geek"
7
+ s.version = BoardGameGeek::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Loren Norman"]
10
+ s.email = ["lorennorman@gmail.com"]
11
+ s.homepage = "http://github.com/lorennorman/board_game_geek"
12
+ s.summary = %q{BoardGameGeek.com scraper}
13
+ s.description = %q{We love board games. We love coding. Let's code against the data from BoardGameGeek.com!}
14
+
15
+ s.rubyforge_project = "board_game_geek"
16
+
17
+ s.add_dependency "nokogiri"
18
+ s.add_development_dependency "minitest"
19
+ s.add_development_dependency "vcr"
20
+ s.add_development_dependency "webmock"
21
+ s.add_development_dependency "rake"
22
+
23
+ s.files = `git ls-files`.split("\n")
24
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
25
+ # s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
26
+ s.require_paths = ["lib"]
27
+ end
@@ -0,0 +1,3 @@
1
+ module BoardGameGeek
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,56 @@
1
+ require "bundler/setup"
2
+ require "nokogiri"
3
+ require "open-uri"
4
+ require "ostruct"
5
+
6
+ module BoardGameGeek
7
+ class Scraper
8
+ RANKED_GAMES_URI = 'http://boardgamegeek.com/browse/boardgame/page/'
9
+ GAMES_PER_PAGE = 100
10
+
11
+ def self.top_games how_many
12
+ # break total into pages + remainder
13
+ full_pages, remainder = self.break_down_total(how_many)
14
+ # accumulate #{pages} calls to bgg
15
+ # accumulate one more call(#{remainder}) to bgg
16
+
17
+ Array.new.tap do |games|
18
+ (1..(full_pages)).each do |page_number|
19
+ how_many = GAMES_PER_PAGE
20
+ how_many = remainder if page_number == full_pages && remainder > 0
21
+
22
+ doc = Nokogiri::HTML(open "#{RANKED_GAMES_URI}#{page_number}.html")
23
+
24
+ doc.css('table#collectionitems tr').each_with_index do |game_row, idx|
25
+ next if idx == 0
26
+ break if idx > how_many
27
+
28
+ cells = game_row.css("td").map(&:inner_text).map(&:strip)
29
+
30
+ name_and_date = cells[2].to_s.split("\n")
31
+
32
+ ranking = cells[0]
33
+ name = name_and_date[0]
34
+ release_date = name_and_date[2].to_s.strip[1..-2]
35
+ rating = cells[3]
36
+
37
+ image_url = game_row.css(".collection_thumbnail img").first["src"]
38
+ image_url.sub!("_mt", "_t")
39
+
40
+ games << OpenStruct.new(name: name,
41
+ ranking: ranking,
42
+ rating: rating,
43
+ release_date: release_date,
44
+ image_url: image_url)
45
+ end
46
+ end
47
+ end
48
+ end
49
+
50
+ def self.break_down_total total
51
+ [(total.to_f/GAMES_PER_PAGE).ceil, total%GAMES_PER_PAGE]
52
+ end
53
+ end
54
+ end
55
+
56
+ BGG = BoardGameGeek
Binary file
@@ -0,0 +1,62 @@
1
+ require "spec_helper"
2
+
3
+ describe BGG::Scraper do
4
+ subject { BGG::Scraper }
5
+
6
+ after do
7
+ VCR.eject_cassette
8
+ end
9
+
10
+ describe "breaking a total into pages and remainder" do
11
+ data = { 1 => [ 1, 1 ],
12
+ 50 => [ 1, 50 ],
13
+ 100 => [ 1, 0 ],
14
+ 101 => [ 2, 1 ],
15
+ 199 => [ 2, 99 ],
16
+ 200 => [ 2, 0 ],
17
+ 201 => [ 3, 1 ],
18
+ 999 => [ 10, 99 ],
19
+ 1000 => [ 10, 0 ],
20
+ 1001 => [ 11, 1 ] }
21
+
22
+ data.each do |total, desired_results|
23
+ pages, remainder = desired_results
24
+
25
+ it "breaks a total of #{total} into #{pages} pages and #{remainder} remainder" do
26
+ final_pages, final_remainder = subject.break_down_total(total)
27
+ final_pages.must_be :==, pages
28
+ final_remainder.must_be :==, remainder
29
+ end
30
+ end
31
+ end
32
+
33
+ describe "a game record" do
34
+ before do
35
+ VCR.insert_cassette('board_game_geek_1')
36
+ end
37
+
38
+ let(:first_game) { subject.top_games(1).first }
39
+
40
+ it "exposes some handy attributes" do
41
+ first_game.must_respond_to :name
42
+ first_game.must_respond_to :image_url
43
+ first_game.must_respond_to :rating
44
+ first_game.must_respond_to :ranking
45
+ first_game.must_respond_to :release_date
46
+ end
47
+
48
+ it "correctly parses the image source" do
49
+ first_game.image_url.must_be :==, "http://cf.geekdo-images.com/images/pic361592_t.jpg"
50
+ end
51
+ end
52
+
53
+ it "gets the top 100 games" do
54
+ VCR.insert_cassette('board_game_geek_100')
55
+ subject.top_games(100).length.must_be :==, 100
56
+ end
57
+
58
+ it "gets the top 1000 games" do
59
+ VCR.insert_cassette('board_game_geek_1000')
60
+ subject.top_games(1000).length.must_be :==, 1000
61
+ end
62
+ end
@@ -0,0 +1,20 @@
1
+ require "bundler/setup"
2
+
3
+ # MiniTest
4
+ require "minitest/autorun"
5
+ # require 'minitest/reporters'
6
+ # MiniTest::Unit.runner = MiniTest::SuiteRunner.new
7
+ # MiniTest::Unit.runner.reporters << MiniTest::Reporters::SpecReporter.new
8
+
9
+ # VCR
10
+ require "vcr"
11
+
12
+ VCR.config do |c|
13
+ c.cassette_library_dir = 'spec/vcr_cassettes'
14
+ c.stub_with :webmock
15
+ end
16
+
17
+ # System Under Test
18
+ require "board_game_geek"
19
+
20
+
metadata ADDED
@@ -0,0 +1,119 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: board_game_geek
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Loren Norman
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-11-13 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: &2153892060 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *2153892060
25
+ - !ruby/object:Gem::Dependency
26
+ name: minitest
27
+ requirement: &2153891500 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *2153891500
36
+ - !ruby/object:Gem::Dependency
37
+ name: vcr
38
+ requirement: &2153891040 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *2153891040
47
+ - !ruby/object:Gem::Dependency
48
+ name: webmock
49
+ requirement: &2153890620 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *2153890620
58
+ - !ruby/object:Gem::Dependency
59
+ name: rake
60
+ requirement: &2153890200 !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ type: :development
67
+ prerelease: false
68
+ version_requirements: *2153890200
69
+ description: We love board games. We love coding. Let's code against the data from
70
+ BoardGameGeek.com!
71
+ email:
72
+ - lorennorman@gmail.com
73
+ executables: []
74
+ extensions: []
75
+ extra_rdoc_files: []
76
+ files:
77
+ - .gitignore
78
+ - Gemfile
79
+ - Gemfile.lock
80
+ - Rakefile
81
+ - board_game_geek.gemspec
82
+ - lib/board_game_geek.rb
83
+ - lib/board_game_geek/version.rb
84
+ - pkg/board_game_geek-0.0.1.gem
85
+ - spec/scraper_spec.rb
86
+ - spec/spec_helper.rb
87
+ homepage: http://github.com/lorennorman/board_game_geek
88
+ licenses: []
89
+ post_install_message:
90
+ rdoc_options: []
91
+ require_paths:
92
+ - lib
93
+ required_ruby_version: !ruby/object:Gem::Requirement
94
+ none: false
95
+ requirements:
96
+ - - ! '>='
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ segments:
100
+ - 0
101
+ hash: 2103976381804118628
102
+ required_rubygems_version: !ruby/object:Gem::Requirement
103
+ none: false
104
+ requirements:
105
+ - - ! '>='
106
+ - !ruby/object:Gem::Version
107
+ version: '0'
108
+ segments:
109
+ - 0
110
+ hash: 2103976381804118628
111
+ requirements: []
112
+ rubyforge_project: board_game_geek
113
+ rubygems_version: 1.8.10
114
+ signing_key:
115
+ specification_version: 3
116
+ summary: BoardGameGeek.com scraper
117
+ test_files:
118
+ - spec/scraper_spec.rb
119
+ - spec/spec_helper.rb