board_game_geek 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +28 -0
- data/Rakefile +14 -0
- data/board_game_geek.gemspec +27 -0
- data/lib/board_game_geek/version.rb +3 -0
- data/lib/board_game_geek.rb +56 -0
- data/pkg/board_game_geek-0.0.1.gem +0 -0
- data/spec/scraper_spec.rb +62 -0
- data/spec/spec_helper.rb +20 -0
- metadata +119 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
board_game_geek (0.0.1)
|
5
|
+
nokogiri
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: http://rubygems.org/
|
9
|
+
specs:
|
10
|
+
addressable (2.2.6)
|
11
|
+
crack (0.3.1)
|
12
|
+
minitest (2.8.0)
|
13
|
+
nokogiri (1.5.0)
|
14
|
+
rake (0.9.2.2)
|
15
|
+
vcr (1.11.3)
|
16
|
+
webmock (1.7.7)
|
17
|
+
addressable (~> 2.2, > 2.2.5)
|
18
|
+
crack (>= 0.1.7)
|
19
|
+
|
20
|
+
PLATFORMS
|
21
|
+
ruby
|
22
|
+
|
23
|
+
DEPENDENCIES
|
24
|
+
board_game_geek!
|
25
|
+
minitest
|
26
|
+
rake
|
27
|
+
vcr
|
28
|
+
webmock
|
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
### Gem Development/Management
|
2
|
+
require 'bundler'
|
3
|
+
Bundler::GemHelper.install_tasks
|
4
|
+
|
5
|
+
|
6
|
+
### Spec Runner
|
7
|
+
require 'rake/testtask'
|
8
|
+
Rake::TestTask.new :spec do |t|
|
9
|
+
t.libs << "spec"
|
10
|
+
t.pattern = "spec/*_spec.rb"
|
11
|
+
t.verbose = true
|
12
|
+
end
|
13
|
+
|
14
|
+
task :default => :spec
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "board_game_geek/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "board_game_geek"
|
7
|
+
s.version = BoardGameGeek::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Loren Norman"]
|
10
|
+
s.email = ["lorennorman@gmail.com"]
|
11
|
+
s.homepage = "http://github.com/lorennorman/board_game_geek"
|
12
|
+
s.summary = %q{BoardGameGeek.com scraper}
|
13
|
+
s.description = %q{We love board games. We love coding. Let's code against the data from BoardGameGeek.com!}
|
14
|
+
|
15
|
+
s.rubyforge_project = "board_game_geek"
|
16
|
+
|
17
|
+
s.add_dependency "nokogiri"
|
18
|
+
s.add_development_dependency "minitest"
|
19
|
+
s.add_development_dependency "vcr"
|
20
|
+
s.add_development_dependency "webmock"
|
21
|
+
s.add_development_dependency "rake"
|
22
|
+
|
23
|
+
s.files = `git ls-files`.split("\n")
|
24
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
25
|
+
# s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
26
|
+
s.require_paths = ["lib"]
|
27
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require "bundler/setup"
|
2
|
+
require "nokogiri"
|
3
|
+
require "open-uri"
|
4
|
+
require "ostruct"
|
5
|
+
|
6
|
+
module BoardGameGeek
|
7
|
+
class Scraper
|
8
|
+
RANKED_GAMES_URI = 'http://boardgamegeek.com/browse/boardgame/page/'
|
9
|
+
GAMES_PER_PAGE = 100
|
10
|
+
|
11
|
+
def self.top_games how_many
|
12
|
+
# break total into pages + remainder
|
13
|
+
full_pages, remainder = self.break_down_total(how_many)
|
14
|
+
# accumulate #{pages} calls to bgg
|
15
|
+
# accumulate one more call(#{remainder}) to bgg
|
16
|
+
|
17
|
+
Array.new.tap do |games|
|
18
|
+
(1..(full_pages)).each do |page_number|
|
19
|
+
how_many = GAMES_PER_PAGE
|
20
|
+
how_many = remainder if page_number == full_pages && remainder > 0
|
21
|
+
|
22
|
+
doc = Nokogiri::HTML(open "#{RANKED_GAMES_URI}#{page_number}.html")
|
23
|
+
|
24
|
+
doc.css('table#collectionitems tr').each_with_index do |game_row, idx|
|
25
|
+
next if idx == 0
|
26
|
+
break if idx > how_many
|
27
|
+
|
28
|
+
cells = game_row.css("td").map(&:inner_text).map(&:strip)
|
29
|
+
|
30
|
+
name_and_date = cells[2].to_s.split("\n")
|
31
|
+
|
32
|
+
ranking = cells[0]
|
33
|
+
name = name_and_date[0]
|
34
|
+
release_date = name_and_date[2].to_s.strip[1..-2]
|
35
|
+
rating = cells[3]
|
36
|
+
|
37
|
+
image_url = game_row.css(".collection_thumbnail img").first["src"]
|
38
|
+
image_url.sub!("_mt", "_t")
|
39
|
+
|
40
|
+
games << OpenStruct.new(name: name,
|
41
|
+
ranking: ranking,
|
42
|
+
rating: rating,
|
43
|
+
release_date: release_date,
|
44
|
+
image_url: image_url)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.break_down_total total
|
51
|
+
[(total.to_f/GAMES_PER_PAGE).ceil, total%GAMES_PER_PAGE]
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
BGG = BoardGameGeek
|
Binary file
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
describe BGG::Scraper do
|
4
|
+
subject { BGG::Scraper }
|
5
|
+
|
6
|
+
after do
|
7
|
+
VCR.eject_cassette
|
8
|
+
end
|
9
|
+
|
10
|
+
describe "breaking a total into pages and remainder" do
|
11
|
+
data = { 1 => [ 1, 1 ],
|
12
|
+
50 => [ 1, 50 ],
|
13
|
+
100 => [ 1, 0 ],
|
14
|
+
101 => [ 2, 1 ],
|
15
|
+
199 => [ 2, 99 ],
|
16
|
+
200 => [ 2, 0 ],
|
17
|
+
201 => [ 3, 1 ],
|
18
|
+
999 => [ 10, 99 ],
|
19
|
+
1000 => [ 10, 0 ],
|
20
|
+
1001 => [ 11, 1 ] }
|
21
|
+
|
22
|
+
data.each do |total, desired_results|
|
23
|
+
pages, remainder = desired_results
|
24
|
+
|
25
|
+
it "breaks a total of #{total} into #{pages} pages and #{remainder} remainder" do
|
26
|
+
final_pages, final_remainder = subject.break_down_total(total)
|
27
|
+
final_pages.must_be :==, pages
|
28
|
+
final_remainder.must_be :==, remainder
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
describe "a game record" do
|
34
|
+
before do
|
35
|
+
VCR.insert_cassette('board_game_geek_1')
|
36
|
+
end
|
37
|
+
|
38
|
+
let(:first_game) { subject.top_games(1).first }
|
39
|
+
|
40
|
+
it "exposes some handy attributes" do
|
41
|
+
first_game.must_respond_to :name
|
42
|
+
first_game.must_respond_to :image_url
|
43
|
+
first_game.must_respond_to :rating
|
44
|
+
first_game.must_respond_to :ranking
|
45
|
+
first_game.must_respond_to :release_date
|
46
|
+
end
|
47
|
+
|
48
|
+
it "correctly parses the image source" do
|
49
|
+
first_game.image_url.must_be :==, "http://cf.geekdo-images.com/images/pic361592_t.jpg"
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
it "gets the top 100 games" do
|
54
|
+
VCR.insert_cassette('board_game_geek_100')
|
55
|
+
subject.top_games(100).length.must_be :==, 100
|
56
|
+
end
|
57
|
+
|
58
|
+
it "gets the top 1000 games" do
|
59
|
+
VCR.insert_cassette('board_game_geek_1000')
|
60
|
+
subject.top_games(1000).length.must_be :==, 1000
|
61
|
+
end
|
62
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require "bundler/setup"
|
2
|
+
|
3
|
+
# MiniTest
|
4
|
+
require "minitest/autorun"
|
5
|
+
# require 'minitest/reporters'
|
6
|
+
# MiniTest::Unit.runner = MiniTest::SuiteRunner.new
|
7
|
+
# MiniTest::Unit.runner.reporters << MiniTest::Reporters::SpecReporter.new
|
8
|
+
|
9
|
+
# VCR
|
10
|
+
require "vcr"
|
11
|
+
|
12
|
+
VCR.config do |c|
|
13
|
+
c.cassette_library_dir = 'spec/vcr_cassettes'
|
14
|
+
c.stub_with :webmock
|
15
|
+
end
|
16
|
+
|
17
|
+
# System Under Test
|
18
|
+
require "board_game_geek"
|
19
|
+
|
20
|
+
|
metadata
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: board_game_geek
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Loren Norman
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-11-13 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: nokogiri
|
16
|
+
requirement: &2153892060 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *2153892060
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: minitest
|
27
|
+
requirement: &2153891500 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *2153891500
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: vcr
|
38
|
+
requirement: &2153891040 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
type: :development
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *2153891040
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: webmock
|
49
|
+
requirement: &2153890620 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
type: :development
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *2153890620
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: rake
|
60
|
+
requirement: &2153890200 !ruby/object:Gem::Requirement
|
61
|
+
none: false
|
62
|
+
requirements:
|
63
|
+
- - ! '>='
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '0'
|
66
|
+
type: :development
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: *2153890200
|
69
|
+
description: We love board games. We love coding. Let's code against the data from
|
70
|
+
BoardGameGeek.com!
|
71
|
+
email:
|
72
|
+
- lorennorman@gmail.com
|
73
|
+
executables: []
|
74
|
+
extensions: []
|
75
|
+
extra_rdoc_files: []
|
76
|
+
files:
|
77
|
+
- .gitignore
|
78
|
+
- Gemfile
|
79
|
+
- Gemfile.lock
|
80
|
+
- Rakefile
|
81
|
+
- board_game_geek.gemspec
|
82
|
+
- lib/board_game_geek.rb
|
83
|
+
- lib/board_game_geek/version.rb
|
84
|
+
- pkg/board_game_geek-0.0.1.gem
|
85
|
+
- spec/scraper_spec.rb
|
86
|
+
- spec/spec_helper.rb
|
87
|
+
homepage: http://github.com/lorennorman/board_game_geek
|
88
|
+
licenses: []
|
89
|
+
post_install_message:
|
90
|
+
rdoc_options: []
|
91
|
+
require_paths:
|
92
|
+
- lib
|
93
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
94
|
+
none: false
|
95
|
+
requirements:
|
96
|
+
- - ! '>='
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: '0'
|
99
|
+
segments:
|
100
|
+
- 0
|
101
|
+
hash: 2103976381804118628
|
102
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
103
|
+
none: false
|
104
|
+
requirements:
|
105
|
+
- - ! '>='
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: '0'
|
108
|
+
segments:
|
109
|
+
- 0
|
110
|
+
hash: 2103976381804118628
|
111
|
+
requirements: []
|
112
|
+
rubyforge_project: board_game_geek
|
113
|
+
rubygems_version: 1.8.10
|
114
|
+
signing_key:
|
115
|
+
specification_version: 3
|
116
|
+
summary: BoardGameGeek.com scraper
|
117
|
+
test_files:
|
118
|
+
- spec/scraper_spec.rb
|
119
|
+
- spec/spec_helper.rb
|