tf2r 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/.ruby-gemset +1 -1
- data/.travis.yml +3 -0
- data/CHANGELOG.md +0 -0
- data/Gemfile.lock +38 -2
- data/README.md +30 -2
- data/Rakefile +8 -1
- data/lib/tf2r/scraper.rb +262 -166
- data/lib/tf2r/version.rb +1 -1
- data/spec/raffles.html +320 -0
- data/spec/scraper_spec.rb +210 -3
- data/spec/spec_helper.rb +14 -1
- data/spec/vcr/cassettes/raffles.yml +243 -0
- data/spec/vcr/cassettes/scrape_raffle_for_creator.yml +213 -0
- data/spec/vcr/cassettes/scrape_raffle_for_participants.yml +213 -0
- data/spec/vcr/cassettes/scrape_raffle_for_raffle.yml +213 -0
- data/spec/vcr/cassettes/scrape_ranks.yml +197 -0
- data/spec/vcr/cassettes/scrape_user_not_found.yml +119 -0
- data/spec/vcr/cassettes/scrape_user_real.yml +275 -0
- data/tf2r.gemspec +17 -11
- metadata +84 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cda160e8773382ca326bd42ad5f6469cbf63fd57
|
4
|
+
data.tar.gz: 3a43402a23424491b50559b27fbde5f2c72fd2a0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5a76b5c1c173ca4190a1c4e6197feacfe0a00d4304aa11d07d3de6e5e0f6c9eb9e8a5e335f4b93a594d4adbc50b150c6a9111fdd0f8f683ac9c571ae67cd632d
|
7
|
+
data.tar.gz: 5d1aa559bcb08c0b1479b99d56301987289732c15431056dfab4e458814bb39767a2e149577ec4e862fc3a5e129920e519472909ef44863e17a2f30b0cf472cf
|
data/.gitignore
CHANGED
data/.ruby-gemset
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
tf2r
|
data/.travis.yml
ADDED
data/CHANGELOG.md
ADDED
File without changes
|
data/Gemfile.lock
CHANGED
@@ -1,14 +1,26 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
tf2r (0.0
|
4
|
+
tf2r (0.1.0)
|
5
5
|
mechanize (~> 2.7)
|
6
6
|
|
7
7
|
GEM
|
8
8
|
remote: https://rubygems.org/
|
9
9
|
specs:
|
10
|
+
addressable (2.3.6)
|
11
|
+
cane (2.6.2)
|
12
|
+
parallel
|
10
13
|
coderay (1.1.0)
|
14
|
+
coveralls (0.7.0)
|
15
|
+
multi_json (~> 1.3)
|
16
|
+
rest-client
|
17
|
+
simplecov (>= 0.7)
|
18
|
+
term-ansicolor
|
19
|
+
thor
|
20
|
+
crack (0.4.2)
|
21
|
+
safe_yaml (~> 1.0.0)
|
11
22
|
diff-lcs (1.2.5)
|
23
|
+
docile (1.1.5)
|
12
24
|
domain_name (0.5.19)
|
13
25
|
unf (>= 0.0.5, < 1.0.0)
|
14
26
|
http-cookie (1.0.2)
|
@@ -25,16 +37,22 @@ GEM
|
|
25
37
|
method_source (0.8.2)
|
26
38
|
mime-types (2.3)
|
27
39
|
mini_portile (0.6.0)
|
40
|
+
multi_json (1.10.1)
|
28
41
|
net-http-digest_auth (1.4)
|
29
42
|
net-http-persistent (2.9.4)
|
43
|
+
netrc (0.7.7)
|
30
44
|
nokogiri (1.6.3.1)
|
31
45
|
mini_portile (= 0.6.0)
|
32
46
|
ntlm-http (0.1.1)
|
47
|
+
parallel (1.1.2)
|
33
48
|
pry (0.10.0)
|
34
49
|
coderay (~> 1.1.0)
|
35
50
|
method_source (~> 0.8.1)
|
36
51
|
slop (~> 3.4)
|
37
52
|
rake (10.3.2)
|
53
|
+
rest-client (1.7.2)
|
54
|
+
mime-types (>= 1.16, < 3.0)
|
55
|
+
netrc (~> 0.7)
|
38
56
|
rspec (3.0.0)
|
39
57
|
rspec-core (~> 3.0.0)
|
40
58
|
rspec-expectations (~> 3.0.0)
|
@@ -47,18 +65,36 @@ GEM
|
|
47
65
|
rspec-mocks (3.0.3)
|
48
66
|
rspec-support (~> 3.0.0)
|
49
67
|
rspec-support (3.0.3)
|
68
|
+
safe_yaml (1.0.3)
|
69
|
+
simplecov (0.9.0)
|
70
|
+
docile (~> 1.1.0)
|
71
|
+
multi_json
|
72
|
+
simplecov-html (~> 0.8.0)
|
73
|
+
simplecov-html (0.8.0)
|
50
74
|
slop (3.6.0)
|
75
|
+
term-ansicolor (1.3.0)
|
76
|
+
tins (~> 1.0)
|
77
|
+
thor (0.19.1)
|
78
|
+
tins (1.3.0)
|
51
79
|
unf (0.1.4)
|
52
80
|
unf_ext
|
53
81
|
unf_ext (0.0.6)
|
82
|
+
vcr (2.9.2)
|
83
|
+
webmock (1.18.0)
|
84
|
+
addressable (>= 2.3.6)
|
85
|
+
crack (>= 0.3.2)
|
54
86
|
webrobots (0.1.1)
|
55
87
|
|
56
88
|
PLATFORMS
|
57
89
|
ruby
|
58
90
|
|
59
91
|
DEPENDENCIES
|
60
|
-
bundler (~> 1.
|
92
|
+
bundler (~> 1.3)
|
93
|
+
cane (~> 2.6)
|
94
|
+
coveralls (~> 0.7)
|
61
95
|
pry (~> 0.10)
|
62
96
|
rake (~> 10.0)
|
63
97
|
rspec (~> 3.0)
|
64
98
|
tf2r!
|
99
|
+
vcr (~> 2.9)
|
100
|
+
webmock (~> 1.18)
|
data/README.md
CHANGED
@@ -1,8 +1,31 @@
|
|
1
|
-
# TF2R [
|
1
|
+
# TF2R - [tf2r.com][tf2r] interaction gem
|
2
2
|
|
3
|
+
[tf2r]: http://tf2r.com
|
4
|
+
|
5
|
+
[![Gem Version](http://img.shields.io/gem/v/tf2r.svg)][gem]
|
6
|
+
[![Build Status](http://img.shields.io/travis/justinkim/tf2r.svg)][travis]
|
7
|
+
[![Dependency Status](http://img.shields.io/gemnasium/justinkim/tf2r.svg)][gemnasium]
|
8
|
+
[![Coverage Status](https://img.shields.io/coveralls/justinkim/tf2r.svg)][coveralls]
|
9
|
+
[![Code Climate](http://img.shields.io/codeclimate/github/justinkim/tf2r.svg)][codeclimate]
|
10
|
+
|
11
|
+
[codeclimate]:https://codeclimate.com/github/justinkim/tf2r
|
12
|
+
[coveralls]: https://coveralls.io/r/justinkim/tf2r
|
13
|
+
[gem]: http://badge.fury.io/rb/tf2r
|
14
|
+
[gemnasium]: https://gemnasium.com/justinkim/tf2r
|
15
|
+
[travis]: https://travis-ci.org/justinkim/tf2r
|
16
|
+
|
17
|
+
GitHub: [https://github.com/justinkim/tf2r](https://github.com/justinkim/tf2r)
|
18
|
+
|
19
|
+
Documentation: [http://www.rubydoc.info/github/justinkim/tf2r](http://www.rubydoc.info/github/justinkim/tf2r)
|
20
|
+
|
21
|
+
Bugs: [https://github.com/justinkim/tf2r/issues](https://github.com/justinkim/tf2r/issues)
|
22
|
+
|
23
|
+
## Description
|
3
24
|
This gem provides a `TF2R::Scraper` that has the ability to scrape various pages on [TF2R](http://tf2r.com) into usable data.
|
4
25
|
|
5
|
-
Yes, this gem is [semantically versioned]
|
26
|
+
Yes, this gem is [semantically versioned][semver]!
|
27
|
+
|
28
|
+
[semver]: http://semver.org
|
6
29
|
|
7
30
|
## Installation
|
8
31
|
|
@@ -31,3 +54,8 @@ TODO: Write usage instructions here
|
|
31
54
|
3. Commit your changes (`git commit -am 'Add some feature'`)
|
32
55
|
4. Push to the branch (`git push origin my-new-feature`)
|
33
56
|
5. Create a new Pull Request
|
57
|
+
|
58
|
+
## License
|
59
|
+
Released under the ISC license. See the [LICENSE][] for further details.
|
60
|
+
|
61
|
+
[license]: LICENSE.md
|
data/Rakefile
CHANGED
data/lib/tf2r/scraper.rb
CHANGED
@@ -1,195 +1,291 @@
|
|
1
1
|
module TF2R
|
2
|
+
# @author Justin Kim
|
2
3
|
class Scraper
|
4
|
+
# Creates a Scraper. Pass values using the options hash.
|
5
|
+
#
|
6
|
+
# :user_agent a String used for the User-Agent header
|
7
|
+
# :cookies_txt a File containing cookies to load into the Mechanize agent
|
8
|
+
#
|
9
|
+
# @param opts [Hash] options to create a Scraper with
|
10
|
+
# @option opts [String] :user_agent a custom User-Agent header content
|
11
|
+
# @option opts [File] :cookies_txt a cookies.txt to load the Mechanize
|
12
|
+
# agent with
|
3
13
|
def initialize(options)
|
4
14
|
@mech = Mechanize.new { |mech|
|
5
15
|
mech.user_agent = options[:user_agent] || "TF2R::Scraper #{VERSION}"
|
6
16
|
}
|
7
17
|
|
8
|
-
|
18
|
+
load_cookies(options[:cookies_txt]) if options[:cookies_txt]
|
9
19
|
end
|
10
20
|
|
21
|
+
# Loads the Mechanize agent with cookies from a cookies.txt.
|
22
|
+
#
|
23
|
+
# Certain pages on TF2R require a session with a logged-in user.
|
24
|
+
# This requires a Netscape-style cookies.txt that contains a valid
|
25
|
+
# "session" cookie for "tf2r.com".
|
26
|
+
#
|
27
|
+
# @param cookies_txt [File] the cookies.txt file.
|
28
|
+
# @return [Mechanize::CookieJar] the CookieJar of the Mechanize agent.
|
29
|
+
def load_cookies(cookies_txt)
|
30
|
+
@mech.cookie_jar.load(cookies_txt, :cookiestxt)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Fetches the page at the given URL.
|
34
|
+
#
|
35
|
+
# @param url [String] the desired URL.
|
36
|
+
# @return [Mechanize::Page] the page given by Mechanize.
|
11
37
|
def fetch(url)
|
12
38
|
@mech.get(url)
|
13
39
|
end
|
14
|
-
end
|
15
|
-
end
|
16
40
|
|
17
|
-
|
18
|
-
|
19
|
-
# This is the old Scraper from NervyPipe.
|
20
|
-
class Scraper
|
21
|
-
def initialize(cookies_txt_path)
|
22
|
-
@cookies_txt_path = cookies_txt_path
|
23
|
-
|
24
|
-
@main = Mechanize.new { |agent|
|
25
|
-
# the User-Agent field in headers
|
26
|
-
agent.user_agent = 'Jenna Bot'
|
27
|
-
}
|
28
|
-
|
29
|
-
auth_cookies(@main)
|
30
|
-
end
|
31
|
-
|
32
|
-
def auth_cookies(mech)
|
33
|
-
# Before anything, load our auth cookies into the cookie jar
|
34
|
-
# This requires a Netscape-style cookies.txt to be in the working dir
|
41
|
+
# Scrapes TF2R for all active raffles.
|
35
42
|
#
|
36
|
-
#
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
scrape_main_page
|
53
|
-
when :ranks
|
54
|
-
scrape_ranks
|
43
|
+
# See http://tf2r.com/raffles.html
|
44
|
+
#
|
45
|
+
# @example
|
46
|
+
# s.scrape_main_page #=> ['http://tf2r.com/kold.html',
|
47
|
+
# 'http://tf2r.com/knew.html',
|
48
|
+
# 'http://tf2r.com/knewest.html']
|
49
|
+
#
|
50
|
+
# @return [Hash] String links of all active raffles in chronological
|
51
|
+
# order (oldest to newest by creation time).
|
52
|
+
def scrape_main_page
|
53
|
+
page = fetch('http://tf2r.com/raffles.html')
|
54
|
+
|
55
|
+
# All raffle links begin with 'tf2r.com/k'
|
56
|
+
raffle_links = page.links_with(href: /tf2r\.com\/k/)
|
57
|
+
raffle_links.map! { |x| x.uri.to_s }
|
58
|
+
raffle_links.reverse!
|
55
59
|
end
|
56
|
-
end
|
57
|
-
|
58
|
-
def scrape_main_page
|
59
|
-
page = @main.get('http://tf2r.com/raffles.html')
|
60
|
-
|
61
|
-
# This regex matches all Mechanize::Page::Links on the main raffles page that are actual raffles
|
62
|
-
raffle_mech_links = page.links_with(href: /tf2r\.com\/k/)
|
63
|
-
|
64
|
-
# an array of strings, which are raffle links
|
65
|
-
raffle_links = raffle_mech_links.map { |x| x.uri.to_s }
|
66
|
-
|
67
|
-
# the array should have raffles from bottom-to-top, old-to-new
|
68
|
-
raffle_links.reverse!
|
69
|
-
end
|
70
|
-
|
71
|
-
def scrape_raffle_for_user(page)
|
72
|
-
# This is an array of all things Reag was nice enough to class "raffle_infomation"
|
73
|
-
# Reag made a typo, so the class really is "raffle_infomation"
|
74
|
-
raffle_infos = page.parser.css('.raffle_infomation')
|
75
|
-
|
76
|
-
# User information
|
77
|
-
steam_id = raffle_infos[2].css('a')[0].attributes['href'].text.split('/')[-1].split('.')[0].to_i
|
78
|
-
username = raffle_infos[2].css('a').text
|
79
|
-
avatar_link = raffle_infos[1].css('a')[0].css('img')[0].attributes['src'].text
|
80
|
-
|
81
|
-
# posrept will be nil if the Scraper's user has already voted on a user's rep in the raffle
|
82
|
-
posrepa = raffle_infos.css('.upvb').text.split
|
83
|
-
posrepa.delete('+')
|
84
|
-
posrep = posrepa[-1].to_i.to_s
|
85
|
-
|
86
|
-
negrepa = raffle_infos.css('.downvb').text.split
|
87
|
-
negrepa.delete('+')
|
88
|
-
negrep = negrepa[-1].to_i.to_s
|
89
|
-
|
90
|
-
colour = raffle_infos[2].css('a')[0].attributes['style'].value.split('#')[-1].split(';')[0].downcase.chomp
|
91
|
-
# The creator of the raffle, using above
|
92
|
-
userhash = {steam_id: steam_id, username: username, avatar_link: avatar_link, posrep: posrep, negrep: negrep, colour: colour}
|
93
|
-
end
|
94
|
-
|
95
|
-
def scrape_raffle_for_raffle(page)
|
96
|
-
# This is an array of all things Reag was nice enough to class "raffle_infomation"
|
97
|
-
# Reag made a typo, so the class really is "raffle_infomation"
|
98
|
-
raffle_infos = page.parser.css('.raffle_infomation')
|
99
|
-
|
100
|
-
# Raffle information
|
101
|
-
uri = page.uri # is a URI:HTTP
|
102
|
-
path = uri.path # is "/welcome.html" for "http://tf2r.com/welcome.html"
|
103
|
-
link_snippet = path.split('/')[1].split('.html')[0] # is 'kabc123' for 'http://tf2r.com/kabc123.html'
|
104
60
|
|
105
|
-
|
106
|
-
#
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
#
|
120
|
-
#
|
121
|
-
#
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
61
|
+
# Scrapes a raffle page for information about the creator.
|
62
|
+
#
|
63
|
+
# @example
|
64
|
+
# p = s.fetch('http://tf2r.com/kstzcbd.html')
|
65
|
+
# s.scrape_raffle_for_creator(p) #=>
|
66
|
+
# {:steam_id=>76561198061719848,
|
67
|
+
# :username=>"Yulli",
|
68
|
+
# :avatar_link=>
|
69
|
+
# "http://media.steampowered.com/steamcommunity/public/images/avatars/bc/bc9dc4302d23f2e2f37f59c59f29c27dbc8cade6_full.jpg",
|
70
|
+
# :posrep=>11458,
|
71
|
+
# :negrep=>0,
|
72
|
+
# :colour=>"70b01b"}
|
73
|
+
#
|
74
|
+
# @param page [Mechanize::Page] the raffle page.
|
75
|
+
# @return [Hash] a representation of a user, the raffle creator.
|
76
|
+
# * :steam_id (+Fixnum+) — the creator's SteamID64.
|
77
|
+
# * :username (+String+) — the creator's username.
|
78
|
+
# * :avatar_link (+String+) — a link to the creator's avatar.
|
79
|
+
# * :posrep (+Fixnum+) — the creator's positive rep.
|
80
|
+
# * :negrep (+Fixnum+) — the creator's negative rep.
|
81
|
+
# * :colour (+String+) — hex colour code of the creator's username.
|
82
|
+
def scrape_raffle_for_creator(page)
|
83
|
+
# Reag classed some things "raffle_infomation". That's spelled right.
|
84
|
+
infos = page.parser.css('.raffle_infomation')
|
85
|
+
|
86
|
+
# The main 'a' element, containing the creator's username.
|
87
|
+
user_anchor = infos[2].css('a')[0]
|
88
|
+
|
89
|
+
steam_id = extract_steam_id(user_anchor.attribute('href').to_s)
|
90
|
+
username = user_anchor.text
|
91
|
+
avatar_link = infos[1].css('img')[0].attribute('src').to_s
|
92
|
+
posrep = /(\d+)/.match(infos.css('.upvb').text)[1].to_i
|
93
|
+
negrep = /(\d+)/.match(infos.css('.downvb').text)[1].to_i
|
94
|
+
|
95
|
+
# The creator's username colour. Corresponds to rank.
|
96
|
+
colour = extract_hex_colour(user_anchor.attribute('style').to_s)
|
97
|
+
|
98
|
+
{steam_id: steam_id, username: username, avatar_link: avatar_link,
|
99
|
+
posrep: posrep, negrep: negrep, colour: colour}
|
139
100
|
end
|
140
101
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
102
|
+
# Scrapes a raffle page for information about the raffle.
|
103
|
+
#
|
104
|
+
# @example
|
105
|
+
# p = s.fetch('http://tf2r.com/kstzcbd.html')
|
106
|
+
# s.scrape_raffle_for_raffle(p) #=>
|
107
|
+
# {:link_snippet=>"kstzcbd",
|
108
|
+
# :title=>"Just one refined [1 hour]",
|
109
|
+
# :description=>"Plain and simple.",
|
110
|
+
# :start_time=>2012-10-29 09:51:45 -0400,
|
111
|
+
# :end_time=>2012-10-29 09:53:01 -0400,
|
112
|
+
# :win_chance=>0.1,
|
113
|
+
# :current_entries=>10,
|
114
|
+
# :max_entries=>10,
|
115
|
+
# :is_done=>true}
|
116
|
+
#
|
117
|
+
# @param page [Mechanize::Page] the raffle page.
|
118
|
+
# @return [Hash] a representation of the raffle.
|
119
|
+
# * :link_snippet (+String+) — the "raffle id" in the URL.
|
120
|
+
# * :title (+String+) — the raffle's title.
|
121
|
+
# * :description (+String+) — the raffle's "message".
|
122
|
+
# * :start_time (+Time+) — the creation time of the raffle.
|
123
|
+
# * :end_time (+Time+) — the projects/observed end time for the raffle.
|
124
|
+
# * :win_chance (+Float+) — a participant's chance to win the raffle.
|
125
|
+
# * :current_entries (+Fixnum+) — the current number of participants.
|
126
|
+
# * :max_entries (+Fixnum+) — the maximum number of particpants allowed.
|
127
|
+
# * :is_done (+Boolean+) — whether new users can enter the raffle.
|
128
|
+
def scrape_raffle_for_raffle(page)
|
129
|
+
# Reag classed some things "raffle_infomation". That's spelled right.
|
130
|
+
infos = page.parser.css('.raffle_infomation')
|
131
|
+
|
132
|
+
# Elements of the main raffle info table.
|
133
|
+
raffle_tds = infos[3].css('td')
|
134
|
+
|
135
|
+
# 'kabc123' for http://tf2r.com/kabc123.html'
|
136
|
+
link_snippet = /\/(k.+)\.html/.match(page.uri.path)[1]
|
137
|
+
title = infos[0].text.split('Title: ')[-1]
|
138
|
+
description = raffle_tds[1].text
|
139
|
+
|
140
|
+
start_time = raffle_tds[9].attribute('data-rstart-unix').to_s
|
141
|
+
start_time = DateTime.strptime(start_time, '%s').to_time
|
142
|
+
end_time = raffle_tds[11].attribute('data-rsend-unix').to_s
|
143
|
+
end_time= DateTime.strptime(end_time, '%s').to_time
|
144
|
+
|
145
|
+
win_chance = /(.+)%/.match(infos.css('#winc').text)[1].to_f / 100
|
146
|
+
|
147
|
+
entries = /(\d+)\/(\d+)/.match(infos.css('#entry').text)
|
148
|
+
current_entries = entries[1].to_i
|
149
|
+
max_entries = entries[2].to_i
|
150
|
+
|
151
|
+
text = page.parser.css('.welcome_font').css('div')[3..-1].text
|
152
|
+
is_done = end_time < Time.now ||
|
153
|
+
current_entries == max_entries ||
|
154
|
+
page.parser.css('.welcome_font')[5..-1].text.downcase.include?('winner')
|
155
|
+
|
156
|
+
{link_snippet: link_snippet, title: title, description: description,
|
157
|
+
start_time: start_time, end_time: end_time, win_chance: win_chance,
|
158
|
+
current_entries: current_entries, max_entries: max_entries,
|
159
|
+
is_done: is_done}
|
157
160
|
end
|
158
161
|
|
159
|
-
|
160
|
-
|
162
|
+
# Scrapes a raffle page for all the participants.
|
163
|
+
#
|
164
|
+
# TODO: add an example
|
165
|
+
#
|
166
|
+
# @param page [Mechanize::Page] the raffle page.
|
167
|
+
# @return [Array] contains Hashes representing each of the participants,
|
168
|
+
# in chronological order (first entered to last).
|
169
|
+
# * :steam_id (+Fixnum+) — the participant's SteamID64.
|
170
|
+
# * :username (+String+) — the participant's username.
|
171
|
+
# * :colour (+String+) — hex colour code of the participant's username.
|
172
|
+
def scrape_raffle_for_participants(page)
|
173
|
+
participants = []
|
174
|
+
participant_divs = page.parser.css('.pentry')
|
175
|
+
participant_divs.each do |participant|
|
176
|
+
user_anchor = participant.children[1]
|
177
|
+
steam_id = extract_steam_id(user_anchor.to_s)
|
178
|
+
username = participant.text
|
179
|
+
colour = extract_hex_colour(user_anchor.children[0].attribute('style'))
|
180
|
+
|
181
|
+
participants << {steam_id: steam_id, username: username, colour: colour}
|
182
|
+
end
|
183
|
+
|
184
|
+
participants.reverse!
|
185
|
+
end
|
161
186
|
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
187
|
+
# Scrapes a user page for information about the user.
|
188
|
+
#
|
189
|
+
# @example
|
190
|
+
# p = s.fetch('http://tf2r.com/user/76561198061719848.html')
|
191
|
+
# s.scrape_user(p) #=>
|
192
|
+
# {:steam_id=>76561198061719848,
|
193
|
+
# :username=>"Yulli",
|
194
|
+
# :avatar_link=>
|
195
|
+
# "http://media.steampowered.com/steamcommunity/public/images/avatars/bc/bc9dc4302d23f2e2f37f59c59f29c27dbc8cade6_full.jpg",
|
196
|
+
# :posrep=>11459,
|
197
|
+
# :negrep=>0,
|
198
|
+
# :colour=>"70b01b"}
|
199
|
+
#
|
200
|
+
# @param user_page [Mechanize::Page] the user page.
|
201
|
+
# @return [Hash] a representation of the user.
|
202
|
+
# * :steam_id (+Fixnum+) — the user's SteamID64.
|
203
|
+
# * :username (+String+) — the user's username.
|
204
|
+
# * :avatar_link (+String+) — a link to the user's avatar.
|
205
|
+
# * :posrep (+Fixnum+) — the user's positive rep.
|
206
|
+
# * :negrep (+Fixnum+) — the user's negative rep.
|
207
|
+
# * :colour (+String+) — hex colour code of the user's username.
|
208
|
+
def scrape_user(user_page)
|
209
|
+
if user_page.parser.css('.profile_info').empty?
|
210
|
+
# TODO: Should raise an exception here
|
211
|
+
steam_id = extract_steam_id(user_page.uri.to_s)
|
212
|
+
username, avatar_link, posrep, negrep, colour = nil, nil, nil, nil, nil
|
213
|
+
else
|
214
|
+
infos = user_page.parser.css('.raffle_infomation') #sic
|
215
|
+
user_anchor = infos[2].css('a')[0]
|
216
|
+
|
217
|
+
steam_id = extract_steam_id(user_page.uri.to_s)
|
218
|
+
username = /TF2R Item Raffles - (.+)/.match(user_page.title)[1]
|
219
|
+
avatar_link = infos[0].css('img')[0].attribute('src').to_s
|
220
|
+
|
221
|
+
posrep = infos.css('.upvb').text.to_i
|
222
|
+
negrep = infos.css('.downvb').text.to_i
|
223
|
+
|
224
|
+
colour = extract_hex_colour(infos[1].css('a')[0].attribute('style').to_s)
|
225
|
+
end
|
226
|
+
|
227
|
+
{steam_id: steam_id, username: username, avatar_link: avatar_link,
|
228
|
+
posrep: posrep, negrep: negrep, colour: colour}
|
229
|
+
end
|
169
230
|
|
170
|
-
|
171
|
-
|
172
|
-
|
231
|
+
# Scrapes the TF2R info page for available user ranks.
|
232
|
+
#
|
233
|
+
# See http://tf2r.com/info.html.
|
234
|
+
#
|
235
|
+
# @example
|
236
|
+
# p = s.fetch('http://tf2r.com/info.html')
|
237
|
+
# s.scrape_user(p) #=>
|
238
|
+
# [{:colour=>"ebe2ca", :name=>"User", :description=>"Every new or existing user has this rank."},
|
239
|
+
# {:colour=>"ffd700", :name=>"Trusted", :description=>"This rank can only be assigned on staff approval. Granted for 1,000~ Rep."},
|
240
|
+
# ...]
|
241
|
+
#
|
242
|
+
# @param info_page [Mechanize::Page] the info page.
|
243
|
+
# @return [Array] contains Hashes representing each of the ranks.
|
244
|
+
# * :name (+String+) — the rank's name.
|
245
|
+
# * :description (+String+) — the rank's description.
|
246
|
+
# * :colour (+String+) — the rank's hex colour code.
|
247
|
+
def scrape_ranks(info_page)
|
248
|
+
rank_divs = info_page.parser.css('#ranks').children
|
249
|
+
ranks = rank_divs.select { |div| div.children.size == 3 }
|
250
|
+
ranks.map { |div| extract_rank(div) }
|
251
|
+
end
|
173
252
|
|
174
|
-
|
175
|
-
negrep = raffle_infos.css('.downvb').text.to_i.to_s
|
253
|
+
private
|
176
254
|
|
177
|
-
|
255
|
+
# Extracts a rank hash from a rank div.
|
256
|
+
# Only for use by #scrape_ranks.
|
257
|
+
#
|
258
|
+
# @param rank_div [Nokogiri::XML::Element] a div containing the rank info.
|
259
|
+
# @return [Hash] a representation of a rank as outlined in #scrape_ranks.
|
260
|
+
def extract_rank(div)
|
261
|
+
name = div.children[0].text
|
262
|
+
description = div.children[2].text
|
263
|
+
colour = extract_hex_colour(div.children[0].attribute('style').to_s)
|
264
|
+
|
265
|
+
{name: name, description: description, colour: colour}
|
178
266
|
end
|
179
267
|
|
180
|
-
|
181
|
-
|
268
|
+
# Extracts a SteamID64 from a TF2R user link.
|
269
|
+
#
|
270
|
+
# @example
|
271
|
+
# extract_steam_id('http://tf2r.com/user/76561198061719848.html')
|
272
|
+
# #=> 76561198061719848
|
273
|
+
#
|
274
|
+
# @param href [String] The full user profile link.
|
275
|
+
# @return [Fixnum] The Steam ID.
|
276
|
+
def extract_steam_id(href)
|
277
|
+
/http:\/\/tf2r.com\/user\/(\d+)\.html/.match(href)[1].to_i
|
278
|
+
end
|
182
279
|
|
183
|
-
|
184
|
-
#
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
colours = rank_divs.map {|div| div.attributes['style'].value.split('color:#')[-1].split(';')[0].downcase.chomp }
|
280
|
+
# Extracts a lowercase hex colour code.
|
281
|
+
#
|
282
|
+
# @example
|
283
|
+
# extract_hex_colour('color:#70B01B;') #=> '70b01b'
|
284
|
+
#
|
285
|
+
# @param href [String] Any string containing a hex colour code.
|
286
|
+
# @return [String] The lowercase hex colour code.
|
287
|
+
def extract_hex_colour(str)
|
288
|
+
/#(\w+)\s*;/.match(str)[1].downcase
|
289
|
+
end
|
194
290
|
end
|
195
291
|
end
|
data/lib/tf2r/version.rb
CHANGED