board_game_geek 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ .rvmrc
2
+ spec/vcr_cassettes/*
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source :rubygems
2
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,28 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ board_game_geek (0.0.1)
5
+ nokogiri
6
+
7
+ GEM
8
+ remote: http://rubygems.org/
9
+ specs:
10
+ addressable (2.2.6)
11
+ crack (0.3.1)
12
+ minitest (2.8.0)
13
+ nokogiri (1.5.0)
14
+ rake (0.9.2.2)
15
+ vcr (1.11.3)
16
+ webmock (1.7.7)
17
+ addressable (~> 2.2, > 2.2.5)
18
+ crack (>= 0.1.7)
19
+
20
+ PLATFORMS
21
+ ruby
22
+
23
+ DEPENDENCIES
24
+ board_game_geek!
25
+ minitest
26
+ rake
27
+ vcr
28
+ webmock
data/Rakefile ADDED
@@ -0,0 +1,14 @@
1
+ ### Gem Development/Management
2
+ require 'bundler'
3
+ Bundler::GemHelper.install_tasks
4
+
5
+
6
+ ### Spec Runner
7
+ require 'rake/testtask'
8
+ Rake::TestTask.new :spec do |t|
9
+ t.libs << "spec"
10
+ t.pattern = "spec/*_spec.rb"
11
+ t.verbose = true
12
+ end
13
+
14
+ task :default => :spec
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "board_game_geek/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "board_game_geek"
7
+ s.version = BoardGameGeek::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Loren Norman"]
10
+ s.email = ["lorennorman@gmail.com"]
11
+ s.homepage = "http://github.com/lorennorman/board_game_geek"
12
+ s.summary = %q{BoardGameGeek.com scraper}
13
+ s.description = %q{We love board games. We love coding. Let's code against the data from BoardGameGeek.com!}
14
+
15
+ s.rubyforge_project = "board_game_geek"
16
+
17
+ s.add_dependency "nokogiri"
18
+ s.add_development_dependency "minitest"
19
+ s.add_development_dependency "vcr"
20
+ s.add_development_dependency "webmock"
21
+ s.add_development_dependency "rake"
22
+
23
+ s.files = `git ls-files`.split("\n")
24
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
25
+ # s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
26
+ s.require_paths = ["lib"]
27
+ end
@@ -0,0 +1,3 @@
1
+ module BoardGameGeek
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,56 @@
1
+ require "bundler/setup"
2
+ require "nokogiri"
3
+ require "open-uri"
4
+ require "ostruct"
5
+
6
+ module BoardGameGeek
7
+ class Scraper
8
+ RANKED_GAMES_URI = 'http://boardgamegeek.com/browse/boardgame/page/'
9
+ GAMES_PER_PAGE = 100
10
+
11
+ def self.top_games how_many
12
+ # break total into pages + remainder
13
+ full_pages, remainder = self.break_down_total(how_many)
14
+ # accumulate #{pages} calls to bgg
15
+ # accumulate one more call(#{remainder}) to bgg
16
+
17
+ Array.new.tap do |games|
18
+ (1..(full_pages)).each do |page_number|
19
+ how_many = GAMES_PER_PAGE
20
+ how_many = remainder if page_number == full_pages && remainder > 0
21
+
22
+ doc = Nokogiri::HTML(open "#{RANKED_GAMES_URI}#{page_number}.html")
23
+
24
+ doc.css('table#collectionitems tr').each_with_index do |game_row, idx|
25
+ next if idx == 0
26
+ break if idx > how_many
27
+
28
+ cells = game_row.css("td").map(&:inner_text).map(&:strip)
29
+
30
+ name_and_date = cells[2].to_s.split("\n")
31
+
32
+ ranking = cells[0]
33
+ name = name_and_date[0]
34
+ release_date = name_and_date[2].to_s.strip[1..-2]
35
+ rating = cells[3]
36
+
37
+ image_url = game_row.css(".collection_thumbnail img").first["src"]
38
+ image_url.sub!("_mt", "_t")
39
+
40
+ games << OpenStruct.new(name: name,
41
+ ranking: ranking,
42
+ rating: rating,
43
+ release_date: release_date,
44
+ image_url: image_url)
45
+ end
46
+ end
47
+ end
48
+ end
49
+
50
+ def self.break_down_total total
51
+ [(total.to_f/GAMES_PER_PAGE).ceil, total%GAMES_PER_PAGE]
52
+ end
53
+ end
54
+ end
55
+
56
+ BGG = BoardGameGeek
Binary file
@@ -0,0 +1,62 @@
1
+ require "spec_helper"
2
+
3
+ describe BGG::Scraper do
4
+ subject { BGG::Scraper }
5
+
6
+ after do
7
+ VCR.eject_cassette
8
+ end
9
+
10
+ describe "breaking a total into pages and remainder" do
11
+ data = { 1 => [ 1, 1 ],
12
+ 50 => [ 1, 50 ],
13
+ 100 => [ 1, 0 ],
14
+ 101 => [ 2, 1 ],
15
+ 199 => [ 2, 99 ],
16
+ 200 => [ 2, 0 ],
17
+ 201 => [ 3, 1 ],
18
+ 999 => [ 10, 99 ],
19
+ 1000 => [ 10, 0 ],
20
+ 1001 => [ 11, 1 ] }
21
+
22
+ data.each do |total, desired_results|
23
+ pages, remainder = desired_results
24
+
25
+ it "breaks a total of #{total} into #{pages} pages and #{remainder} remainder" do
26
+ final_pages, final_remainder = subject.break_down_total(total)
27
+ final_pages.must_be :==, pages
28
+ final_remainder.must_be :==, remainder
29
+ end
30
+ end
31
+ end
32
+
33
+ describe "a game record" do
34
+ before do
35
+ VCR.insert_cassette('board_game_geek_1')
36
+ end
37
+
38
+ let(:first_game) { subject.top_games(1).first }
39
+
40
+ it "exposes some handy attributes" do
41
+ first_game.must_respond_to :name
42
+ first_game.must_respond_to :image_url
43
+ first_game.must_respond_to :rating
44
+ first_game.must_respond_to :ranking
45
+ first_game.must_respond_to :release_date
46
+ end
47
+
48
+ it "correctly parses the image source" do
49
+ first_game.image_url.must_be :==, "http://cf.geekdo-images.com/images/pic361592_t.jpg"
50
+ end
51
+ end
52
+
53
+ it "gets the top 100 games" do
54
+ VCR.insert_cassette('board_game_geek_100')
55
+ subject.top_games(100).length.must_be :==, 100
56
+ end
57
+
58
+ it "gets the top 1000 games" do
59
+ VCR.insert_cassette('board_game_geek_1000')
60
+ subject.top_games(1000).length.must_be :==, 1000
61
+ end
62
+ end
@@ -0,0 +1,20 @@
1
+ require "bundler/setup"
2
+
3
+ # MiniTest
4
+ require "minitest/autorun"
5
+ # require 'minitest/reporters'
6
+ # MiniTest::Unit.runner = MiniTest::SuiteRunner.new
7
+ # MiniTest::Unit.runner.reporters << MiniTest::Reporters::SpecReporter.new
8
+
9
+ # VCR
10
+ require "vcr"
11
+
12
+ VCR.config do |c|
13
+ c.cassette_library_dir = 'spec/vcr_cassettes'
14
+ c.stub_with :webmock
15
+ end
16
+
17
+ # System Under Test
18
+ require "board_game_geek"
19
+
20
+
metadata ADDED
@@ -0,0 +1,119 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: board_game_geek
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Loren Norman
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-11-13 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: &2153892060 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *2153892060
25
+ - !ruby/object:Gem::Dependency
26
+ name: minitest
27
+ requirement: &2153891500 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *2153891500
36
+ - !ruby/object:Gem::Dependency
37
+ name: vcr
38
+ requirement: &2153891040 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :development
45
+ prerelease: false
46
+ version_requirements: *2153891040
47
+ - !ruby/object:Gem::Dependency
48
+ name: webmock
49
+ requirement: &2153890620 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *2153890620
58
+ - !ruby/object:Gem::Dependency
59
+ name: rake
60
+ requirement: &2153890200 !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ! '>='
64
+ - !ruby/object:Gem::Version
65
+ version: '0'
66
+ type: :development
67
+ prerelease: false
68
+ version_requirements: *2153890200
69
+ description: We love board games. We love coding. Let's code against the data from
70
+ BoardGameGeek.com!
71
+ email:
72
+ - lorennorman@gmail.com
73
+ executables: []
74
+ extensions: []
75
+ extra_rdoc_files: []
76
+ files:
77
+ - .gitignore
78
+ - Gemfile
79
+ - Gemfile.lock
80
+ - Rakefile
81
+ - board_game_geek.gemspec
82
+ - lib/board_game_geek.rb
83
+ - lib/board_game_geek/version.rb
84
+ - pkg/board_game_geek-0.0.1.gem
85
+ - spec/scraper_spec.rb
86
+ - spec/spec_helper.rb
87
+ homepage: http://github.com/lorennorman/board_game_geek
88
+ licenses: []
89
+ post_install_message:
90
+ rdoc_options: []
91
+ require_paths:
92
+ - lib
93
+ required_ruby_version: !ruby/object:Gem::Requirement
94
+ none: false
95
+ requirements:
96
+ - - ! '>='
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ segments:
100
+ - 0
101
+ hash: 2103976381804118628
102
+ required_rubygems_version: !ruby/object:Gem::Requirement
103
+ none: false
104
+ requirements:
105
+ - - ! '>='
106
+ - !ruby/object:Gem::Version
107
+ version: '0'
108
+ segments:
109
+ - 0
110
+ hash: 2103976381804118628
111
+ requirements: []
112
+ rubyforge_project: board_game_geek
113
+ rubygems_version: 1.8.10
114
+ signing_key:
115
+ specification_version: 3
116
+ summary: BoardGameGeek.com scraper
117
+ test_files:
118
+ - spec/scraper_spec.rb
119
+ - spec/spec_helper.rb