tf2r 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/.ruby-gemset +1 -1
- data/.travis.yml +3 -0
- data/CHANGELOG.md +0 -0
- data/Gemfile.lock +38 -2
- data/README.md +30 -2
- data/Rakefile +8 -1
- data/lib/tf2r/scraper.rb +262 -166
- data/lib/tf2r/version.rb +1 -1
- data/spec/raffles.html +320 -0
- data/spec/scraper_spec.rb +210 -3
- data/spec/spec_helper.rb +14 -1
- data/spec/vcr/cassettes/raffles.yml +243 -0
- data/spec/vcr/cassettes/scrape_raffle_for_creator.yml +213 -0
- data/spec/vcr/cassettes/scrape_raffle_for_participants.yml +213 -0
- data/spec/vcr/cassettes/scrape_raffle_for_raffle.yml +213 -0
- data/spec/vcr/cassettes/scrape_ranks.yml +197 -0
- data/spec/vcr/cassettes/scrape_user_not_found.yml +119 -0
- data/spec/vcr/cassettes/scrape_user_real.yml +275 -0
- data/tf2r.gemspec +17 -11
- metadata +84 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cda160e8773382ca326bd42ad5f6469cbf63fd57
|
4
|
+
data.tar.gz: 3a43402a23424491b50559b27fbde5f2c72fd2a0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5a76b5c1c173ca4190a1c4e6197feacfe0a00d4304aa11d07d3de6e5e0f6c9eb9e8a5e335f4b93a594d4adbc50b150c6a9111fdd0f8f683ac9c571ae67cd632d
|
7
|
+
data.tar.gz: 5d1aa559bcb08c0b1479b99d56301987289732c15431056dfab4e458814bb39767a2e149577ec4e862fc3a5e129920e519472909ef44863e17a2f30b0cf472cf
|
data/.gitignore
CHANGED
data/.ruby-gemset
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
tf2r
|
data/.travis.yml
ADDED
data/CHANGELOG.md
ADDED
File without changes
|
data/Gemfile.lock
CHANGED
@@ -1,14 +1,26 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
tf2r (0.0
|
4
|
+
tf2r (0.1.0)
|
5
5
|
mechanize (~> 2.7)
|
6
6
|
|
7
7
|
GEM
|
8
8
|
remote: https://rubygems.org/
|
9
9
|
specs:
|
10
|
+
addressable (2.3.6)
|
11
|
+
cane (2.6.2)
|
12
|
+
parallel
|
10
13
|
coderay (1.1.0)
|
14
|
+
coveralls (0.7.0)
|
15
|
+
multi_json (~> 1.3)
|
16
|
+
rest-client
|
17
|
+
simplecov (>= 0.7)
|
18
|
+
term-ansicolor
|
19
|
+
thor
|
20
|
+
crack (0.4.2)
|
21
|
+
safe_yaml (~> 1.0.0)
|
11
22
|
diff-lcs (1.2.5)
|
23
|
+
docile (1.1.5)
|
12
24
|
domain_name (0.5.19)
|
13
25
|
unf (>= 0.0.5, < 1.0.0)
|
14
26
|
http-cookie (1.0.2)
|
@@ -25,16 +37,22 @@ GEM
|
|
25
37
|
method_source (0.8.2)
|
26
38
|
mime-types (2.3)
|
27
39
|
mini_portile (0.6.0)
|
40
|
+
multi_json (1.10.1)
|
28
41
|
net-http-digest_auth (1.4)
|
29
42
|
net-http-persistent (2.9.4)
|
43
|
+
netrc (0.7.7)
|
30
44
|
nokogiri (1.6.3.1)
|
31
45
|
mini_portile (= 0.6.0)
|
32
46
|
ntlm-http (0.1.1)
|
47
|
+
parallel (1.1.2)
|
33
48
|
pry (0.10.0)
|
34
49
|
coderay (~> 1.1.0)
|
35
50
|
method_source (~> 0.8.1)
|
36
51
|
slop (~> 3.4)
|
37
52
|
rake (10.3.2)
|
53
|
+
rest-client (1.7.2)
|
54
|
+
mime-types (>= 1.16, < 3.0)
|
55
|
+
netrc (~> 0.7)
|
38
56
|
rspec (3.0.0)
|
39
57
|
rspec-core (~> 3.0.0)
|
40
58
|
rspec-expectations (~> 3.0.0)
|
@@ -47,18 +65,36 @@ GEM
|
|
47
65
|
rspec-mocks (3.0.3)
|
48
66
|
rspec-support (~> 3.0.0)
|
49
67
|
rspec-support (3.0.3)
|
68
|
+
safe_yaml (1.0.3)
|
69
|
+
simplecov (0.9.0)
|
70
|
+
docile (~> 1.1.0)
|
71
|
+
multi_json
|
72
|
+
simplecov-html (~> 0.8.0)
|
73
|
+
simplecov-html (0.8.0)
|
50
74
|
slop (3.6.0)
|
75
|
+
term-ansicolor (1.3.0)
|
76
|
+
tins (~> 1.0)
|
77
|
+
thor (0.19.1)
|
78
|
+
tins (1.3.0)
|
51
79
|
unf (0.1.4)
|
52
80
|
unf_ext
|
53
81
|
unf_ext (0.0.6)
|
82
|
+
vcr (2.9.2)
|
83
|
+
webmock (1.18.0)
|
84
|
+
addressable (>= 2.3.6)
|
85
|
+
crack (>= 0.3.2)
|
54
86
|
webrobots (0.1.1)
|
55
87
|
|
56
88
|
PLATFORMS
|
57
89
|
ruby
|
58
90
|
|
59
91
|
DEPENDENCIES
|
60
|
-
bundler (~> 1.
|
92
|
+
bundler (~> 1.3)
|
93
|
+
cane (~> 2.6)
|
94
|
+
coveralls (~> 0.7)
|
61
95
|
pry (~> 0.10)
|
62
96
|
rake (~> 10.0)
|
63
97
|
rspec (~> 3.0)
|
64
98
|
tf2r!
|
99
|
+
vcr (~> 2.9)
|
100
|
+
webmock (~> 1.18)
|
data/README.md
CHANGED
@@ -1,8 +1,31 @@
|
|
1
|
-
# TF2R [
|
1
|
+
# TF2R - [tf2r.com][tf2r] interaction gem
|
2
2
|
|
3
|
+
[tf2r]: http://tf2r.com
|
4
|
+
|
5
|
+
[][gem]
|
6
|
+
[][travis]
|
7
|
+
[][gemnasium]
|
8
|
+
[][coveralls]
|
9
|
+
[][codeclimate]
|
10
|
+
|
11
|
+
[codeclimate]:https://codeclimate.com/github/justinkim/tf2r
|
12
|
+
[coveralls]: https://coveralls.io/r/justinkim/tf2r
|
13
|
+
[gem]: http://badge.fury.io/rb/tf2r
|
14
|
+
[gemnasium]: https://gemnasium.com/justinkim/tf2r
|
15
|
+
[travis]: https://travis-ci.org/justinkim/tf2r
|
16
|
+
|
17
|
+
GitHub: [https://github.com/justinkim/tf2r](https://github.com/justinkim/tf2r)
|
18
|
+
|
19
|
+
Documentation: [http://www.rubydoc.info/github/justinkim/tf2r](http://www.rubydoc.info/github/justinkim/tf2r)
|
20
|
+
|
21
|
+
Bugs: [https://github.com/justinkim/tf2r/issues](https://github.com/justinkim/tf2r/issues)
|
22
|
+
|
23
|
+
## Description
|
3
24
|
This gem provides a `TF2R::Scraper` that has the ability to scrape various pages on [TF2R](http://tf2r.com) into usable data.
|
4
25
|
|
5
|
-
Yes, this gem is [semantically versioned]
|
26
|
+
Yes, this gem is [semantically versioned][semver]!
|
27
|
+
|
28
|
+
[semver]: http://semver.org
|
6
29
|
|
7
30
|
## Installation
|
8
31
|
|
@@ -31,3 +54,8 @@ TODO: Write usage instructions here
|
|
31
54
|
3. Commit your changes (`git commit -am 'Add some feature'`)
|
32
55
|
4. Push to the branch (`git push origin my-new-feature`)
|
33
56
|
5. Create a new Pull Request
|
57
|
+
|
58
|
+
## License
|
59
|
+
Released under the ISC license. See the [LICENSE][] for further details.
|
60
|
+
|
61
|
+
[license]: LICENSE.md
|
data/Rakefile
CHANGED
data/lib/tf2r/scraper.rb
CHANGED
@@ -1,195 +1,291 @@
|
|
1
1
|
module TF2R
|
2
|
+
# @author Justin Kim
|
2
3
|
class Scraper
|
4
|
+
# Creates a Scraper. Pass values using the options hash.
|
5
|
+
#
|
6
|
+
# :user_agent a String used for the User-Agent header
|
7
|
+
# :cookies_txt a File containing cookies to load into the Mechanize agent
|
8
|
+
#
|
9
|
+
# @param opts [Hash] options to create a Scraper with
|
10
|
+
# @option opts [String] :user_agent a custom User-Agent header content
|
11
|
+
# @option opts [File] :cookies_txt a cookies.txt to load the Mechanize
|
12
|
+
# agent with
|
3
13
|
def initialize(options)
|
4
14
|
@mech = Mechanize.new { |mech|
|
5
15
|
mech.user_agent = options[:user_agent] || "TF2R::Scraper #{VERSION}"
|
6
16
|
}
|
7
17
|
|
8
|
-
|
18
|
+
load_cookies(options[:cookies_txt]) if options[:cookies_txt]
|
9
19
|
end
|
10
20
|
|
21
|
+
# Loads the Mechanize agent with cookies from a cookies.txt.
|
22
|
+
#
|
23
|
+
# Certain pages on TF2R require a session with a logged-in user.
|
24
|
+
# This requires a Netscape-style cookies.txt that contains a valid
|
25
|
+
# "session" cookie for "tf2r.com".
|
26
|
+
#
|
27
|
+
# @param cookies_txt [File] the cookies.txt file.
|
28
|
+
# @return [Mechanize::CookieJar] the CookieJar of the Mechanize agent.
|
29
|
+
def load_cookies(cookies_txt)
|
30
|
+
@mech.cookie_jar.load(cookies_txt, :cookiestxt)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Fetches the page at the given URL.
|
34
|
+
#
|
35
|
+
# @param url [String] the desired URL.
|
36
|
+
# @return [Mechanize::Page] the page given by Mechanize.
|
11
37
|
def fetch(url)
|
12
38
|
@mech.get(url)
|
13
39
|
end
|
14
|
-
end
|
15
|
-
end
|
16
40
|
|
17
|
-
|
18
|
-
|
19
|
-
# This is the old Scraper from NervyPipe.
|
20
|
-
class Scraper
|
21
|
-
def initialize(cookies_txt_path)
|
22
|
-
@cookies_txt_path = cookies_txt_path
|
23
|
-
|
24
|
-
@main = Mechanize.new { |agent|
|
25
|
-
# the User-Agent field in headers
|
26
|
-
agent.user_agent = 'Jenna Bot'
|
27
|
-
}
|
28
|
-
|
29
|
-
auth_cookies(@main)
|
30
|
-
end
|
31
|
-
|
32
|
-
def auth_cookies(mech)
|
33
|
-
# Before anything, load our auth cookies into the cookie jar
|
34
|
-
# This requires a Netscape-style cookies.txt to be in the working dir
|
41
|
+
# Scrapes TF2R for all active raffles.
|
35
42
|
#
|
36
|
-
#
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
scrape_main_page
|
53
|
-
when :ranks
|
54
|
-
scrape_ranks
|
43
|
+
# See http://tf2r.com/raffles.html
|
44
|
+
#
|
45
|
+
# @example
|
46
|
+
# s.scrape_main_page #=> ['http://tf2r.com/kold.html',
|
47
|
+
# 'http://tf2r.com/knew.html',
|
48
|
+
# 'http://tf2r.com/knewest.html']
|
49
|
+
#
|
50
|
+
# @return [Hash] String links of all active raffles in chronological
|
51
|
+
# order (oldest to newest by creation time).
|
52
|
+
def scrape_main_page
|
53
|
+
page = fetch('http://tf2r.com/raffles.html')
|
54
|
+
|
55
|
+
# All raffle links begin with 'tf2r.com/k'
|
56
|
+
raffle_links = page.links_with(href: /tf2r\.com\/k/)
|
57
|
+
raffle_links.map! { |x| x.uri.to_s }
|
58
|
+
raffle_links.reverse!
|
55
59
|
end
|
56
|
-
end
|
57
|
-
|
58
|
-
def scrape_main_page
|
59
|
-
page = @main.get('http://tf2r.com/raffles.html')
|
60
|
-
|
61
|
-
# This regex matches all Mechanize::Page::Links on the main raffles page that are actual raffles
|
62
|
-
raffle_mech_links = page.links_with(href: /tf2r\.com\/k/)
|
63
|
-
|
64
|
-
# an array of strings, which are raffle links
|
65
|
-
raffle_links = raffle_mech_links.map { |x| x.uri.to_s }
|
66
|
-
|
67
|
-
# the array should have raffles from bottom-to-top, old-to-new
|
68
|
-
raffle_links.reverse!
|
69
|
-
end
|
70
|
-
|
71
|
-
def scrape_raffle_for_user(page)
|
72
|
-
# This is an array of all things Reag was nice enough to class "raffle_infomation"
|
73
|
-
# Reag made a typo, so the class really is "raffle_infomation"
|
74
|
-
raffle_infos = page.parser.css('.raffle_infomation')
|
75
|
-
|
76
|
-
# User information
|
77
|
-
steam_id = raffle_infos[2].css('a')[0].attributes['href'].text.split('/')[-1].split('.')[0].to_i
|
78
|
-
username = raffle_infos[2].css('a').text
|
79
|
-
avatar_link = raffle_infos[1].css('a')[0].css('img')[0].attributes['src'].text
|
80
|
-
|
81
|
-
# posrept will be nil if the Scraper's user has already voted on a user's rep in the raffle
|
82
|
-
posrepa = raffle_infos.css('.upvb').text.split
|
83
|
-
posrepa.delete('+')
|
84
|
-
posrep = posrepa[-1].to_i.to_s
|
85
|
-
|
86
|
-
negrepa = raffle_infos.css('.downvb').text.split
|
87
|
-
negrepa.delete('+')
|
88
|
-
negrep = negrepa[-1].to_i.to_s
|
89
|
-
|
90
|
-
colour = raffle_infos[2].css('a')[0].attributes['style'].value.split('#')[-1].split(';')[0].downcase.chomp
|
91
|
-
# The creator of the raffle, using above
|
92
|
-
userhash = {steam_id: steam_id, username: username, avatar_link: avatar_link, posrep: posrep, negrep: negrep, colour: colour}
|
93
|
-
end
|
94
|
-
|
95
|
-
def scrape_raffle_for_raffle(page)
|
96
|
-
# This is an array of all things Reag was nice enough to class "raffle_infomation"
|
97
|
-
# Reag made a typo, so the class really is "raffle_infomation"
|
98
|
-
raffle_infos = page.parser.css('.raffle_infomation')
|
99
|
-
|
100
|
-
# Raffle information
|
101
|
-
uri = page.uri # is a URI:HTTP
|
102
|
-
path = uri.path # is "/welcome.html" for "http://tf2r.com/welcome.html"
|
103
|
-
link_snippet = path.split('/')[1].split('.html')[0] # is 'kabc123' for 'http://tf2r.com/kabc123.html'
|
104
60
|
|
105
|
-
|
106
|
-
#
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
#
|
120
|
-
#
|
121
|
-
#
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
61
|
+
# Scrapes a raffle page for information about the creator.
|
62
|
+
#
|
63
|
+
# @example
|
64
|
+
# p = s.fetch('http://tf2r.com/kstzcbd.html')
|
65
|
+
# s.scrape_raffle_for_creator(p) #=>
|
66
|
+
# {:steam_id=>76561198061719848,
|
67
|
+
# :username=>"Yulli",
|
68
|
+
# :avatar_link=>
|
69
|
+
# "http://media.steampowered.com/steamcommunity/public/images/avatars/bc/bc9dc4302d23f2e2f37f59c59f29c27dbc8cade6_full.jpg",
|
70
|
+
# :posrep=>11458,
|
71
|
+
# :negrep=>0,
|
72
|
+
# :colour=>"70b01b"}
|
73
|
+
#
|
74
|
+
# @param page [Mechanize::Page] the raffle page.
|
75
|
+
# @return [Hash] a representation of a user, the raffle creator.
|
76
|
+
# * :steam_id (+Fixnum+) — the creator's SteamID64.
|
77
|
+
# * :username (+String+) — the creator's username.
|
78
|
+
# * :avatar_link (+String+) — a link to the creator's avatar.
|
79
|
+
# * :posrep (+Fixnum+) — the creator's positive rep.
|
80
|
+
# * :negrep (+Fixnum+) — the creator's negative rep.
|
81
|
+
# * :colour (+String+) — hex colour code of the creator's username.
|
82
|
+
def scrape_raffle_for_creator(page)
|
83
|
+
# Reag classed some things "raffle_infomation". That's spelled right.
|
84
|
+
infos = page.parser.css('.raffle_infomation')
|
85
|
+
|
86
|
+
# The main 'a' element, containing the creator's username.
|
87
|
+
user_anchor = infos[2].css('a')[0]
|
88
|
+
|
89
|
+
steam_id = extract_steam_id(user_anchor.attribute('href').to_s)
|
90
|
+
username = user_anchor.text
|
91
|
+
avatar_link = infos[1].css('img')[0].attribute('src').to_s
|
92
|
+
posrep = /(\d+)/.match(infos.css('.upvb').text)[1].to_i
|
93
|
+
negrep = /(\d+)/.match(infos.css('.downvb').text)[1].to_i
|
94
|
+
|
95
|
+
# The creator's username colour. Corresponds to rank.
|
96
|
+
colour = extract_hex_colour(user_anchor.attribute('style').to_s)
|
97
|
+
|
98
|
+
{steam_id: steam_id, username: username, avatar_link: avatar_link,
|
99
|
+
posrep: posrep, negrep: negrep, colour: colour}
|
139
100
|
end
|
140
101
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
102
|
+
# Scrapes a raffle page for information about the raffle.
|
103
|
+
#
|
104
|
+
# @example
|
105
|
+
# p = s.fetch('http://tf2r.com/kstzcbd.html')
|
106
|
+
# s.scrape_raffle_for_raffle(p) #=>
|
107
|
+
# {:link_snippet=>"kstzcbd",
|
108
|
+
# :title=>"Just one refined [1 hour]",
|
109
|
+
# :description=>"Plain and simple.",
|
110
|
+
# :start_time=>2012-10-29 09:51:45 -0400,
|
111
|
+
# :end_time=>2012-10-29 09:53:01 -0400,
|
112
|
+
# :win_chance=>0.1,
|
113
|
+
# :current_entries=>10,
|
114
|
+
# :max_entries=>10,
|
115
|
+
# :is_done=>true}
|
116
|
+
#
|
117
|
+
# @param page [Mechanize::Page] the raffle page.
|
118
|
+
# @return [Hash] a representation of the raffle.
|
119
|
+
# * :link_snippet (+String+) — the "raffle id" in the URL.
|
120
|
+
# * :title (+String+) — the raffle's title.
|
121
|
+
# * :description (+String+) — the raffle's "message".
|
122
|
+
# * :start_time (+Time+) — the creation time of the raffle.
|
123
|
+
# * :end_time (+Time+) — the projects/observed end time for the raffle.
|
124
|
+
# * :win_chance (+Float+) — a participant's chance to win the raffle.
|
125
|
+
# * :current_entries (+Fixnum+) — the current number of participants.
|
126
|
+
# * :max_entries (+Fixnum+) — the maximum number of particpants allowed.
|
127
|
+
# * :is_done (+Boolean+) — whether new users can enter the raffle.
|
128
|
+
def scrape_raffle_for_raffle(page)
|
129
|
+
# Reag classed some things "raffle_infomation". That's spelled right.
|
130
|
+
infos = page.parser.css('.raffle_infomation')
|
131
|
+
|
132
|
+
# Elements of the main raffle info table.
|
133
|
+
raffle_tds = infos[3].css('td')
|
134
|
+
|
135
|
+
# 'kabc123' for http://tf2r.com/kabc123.html'
|
136
|
+
link_snippet = /\/(k.+)\.html/.match(page.uri.path)[1]
|
137
|
+
title = infos[0].text.split('Title: ')[-1]
|
138
|
+
description = raffle_tds[1].text
|
139
|
+
|
140
|
+
start_time = raffle_tds[9].attribute('data-rstart-unix').to_s
|
141
|
+
start_time = DateTime.strptime(start_time, '%s').to_time
|
142
|
+
end_time = raffle_tds[11].attribute('data-rsend-unix').to_s
|
143
|
+
end_time= DateTime.strptime(end_time, '%s').to_time
|
144
|
+
|
145
|
+
win_chance = /(.+)%/.match(infos.css('#winc').text)[1].to_f / 100
|
146
|
+
|
147
|
+
entries = /(\d+)\/(\d+)/.match(infos.css('#entry').text)
|
148
|
+
current_entries = entries[1].to_i
|
149
|
+
max_entries = entries[2].to_i
|
150
|
+
|
151
|
+
text = page.parser.css('.welcome_font').css('div')[3..-1].text
|
152
|
+
is_done = end_time < Time.now ||
|
153
|
+
current_entries == max_entries ||
|
154
|
+
page.parser.css('.welcome_font')[5..-1].text.downcase.include?('winner')
|
155
|
+
|
156
|
+
{link_snippet: link_snippet, title: title, description: description,
|
157
|
+
start_time: start_time, end_time: end_time, win_chance: win_chance,
|
158
|
+
current_entries: current_entries, max_entries: max_entries,
|
159
|
+
is_done: is_done}
|
157
160
|
end
|
158
161
|
|
159
|
-
|
160
|
-
|
162
|
+
# Scrapes a raffle page for all the participants.
|
163
|
+
#
|
164
|
+
# TODO: add an example
|
165
|
+
#
|
166
|
+
# @param page [Mechanize::Page] the raffle page.
|
167
|
+
# @return [Array] contains Hashes representing each of the participants,
|
168
|
+
# in chronological order (first entered to last).
|
169
|
+
# * :steam_id (+Fixnum+) — the participant's SteamID64.
|
170
|
+
# * :username (+String+) — the participant's username.
|
171
|
+
# * :colour (+String+) — hex colour code of the participant's username.
|
172
|
+
def scrape_raffle_for_participants(page)
|
173
|
+
participants = []
|
174
|
+
participant_divs = page.parser.css('.pentry')
|
175
|
+
participant_divs.each do |participant|
|
176
|
+
user_anchor = participant.children[1]
|
177
|
+
steam_id = extract_steam_id(user_anchor.to_s)
|
178
|
+
username = participant.text
|
179
|
+
colour = extract_hex_colour(user_anchor.children[0].attribute('style'))
|
180
|
+
|
181
|
+
participants << {steam_id: steam_id, username: username, colour: colour}
|
182
|
+
end
|
183
|
+
|
184
|
+
participants.reverse!
|
185
|
+
end
|
161
186
|
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
187
|
+
# Scrapes a user page for information about the user.
|
188
|
+
#
|
189
|
+
# @example
|
190
|
+
# p = s.fetch('http://tf2r.com/user/76561198061719848.html')
|
191
|
+
# s.scrape_user(p) #=>
|
192
|
+
# {:steam_id=>76561198061719848,
|
193
|
+
# :username=>"Yulli",
|
194
|
+
# :avatar_link=>
|
195
|
+
# "http://media.steampowered.com/steamcommunity/public/images/avatars/bc/bc9dc4302d23f2e2f37f59c59f29c27dbc8cade6_full.jpg",
|
196
|
+
# :posrep=>11459,
|
197
|
+
# :negrep=>0,
|
198
|
+
# :colour=>"70b01b"}
|
199
|
+
#
|
200
|
+
# @param user_page [Mechanize::Page] the user page.
|
201
|
+
# @return [Hash] a representation of the user.
|
202
|
+
# * :steam_id (+Fixnum+) — the user's SteamID64.
|
203
|
+
# * :username (+String+) — the user's username.
|
204
|
+
# * :avatar_link (+String+) — a link to the user's avatar.
|
205
|
+
# * :posrep (+Fixnum+) — the user's positive rep.
|
206
|
+
# * :negrep (+Fixnum+) — the user's negative rep.
|
207
|
+
# * :colour (+String+) — hex colour code of the user's username.
|
208
|
+
def scrape_user(user_page)
|
209
|
+
if user_page.parser.css('.profile_info').empty?
|
210
|
+
# TODO: Should raise an exception here
|
211
|
+
steam_id = extract_steam_id(user_page.uri.to_s)
|
212
|
+
username, avatar_link, posrep, negrep, colour = nil, nil, nil, nil, nil
|
213
|
+
else
|
214
|
+
infos = user_page.parser.css('.raffle_infomation') #sic
|
215
|
+
user_anchor = infos[2].css('a')[0]
|
216
|
+
|
217
|
+
steam_id = extract_steam_id(user_page.uri.to_s)
|
218
|
+
username = /TF2R Item Raffles - (.+)/.match(user_page.title)[1]
|
219
|
+
avatar_link = infos[0].css('img')[0].attribute('src').to_s
|
220
|
+
|
221
|
+
posrep = infos.css('.upvb').text.to_i
|
222
|
+
negrep = infos.css('.downvb').text.to_i
|
223
|
+
|
224
|
+
colour = extract_hex_colour(infos[1].css('a')[0].attribute('style').to_s)
|
225
|
+
end
|
226
|
+
|
227
|
+
{steam_id: steam_id, username: username, avatar_link: avatar_link,
|
228
|
+
posrep: posrep, negrep: negrep, colour: colour}
|
229
|
+
end
|
169
230
|
|
170
|
-
|
171
|
-
|
172
|
-
|
231
|
+
# Scrapes the TF2R info page for available user ranks.
|
232
|
+
#
|
233
|
+
# See http://tf2r.com/info.html.
|
234
|
+
#
|
235
|
+
# @example
|
236
|
+
# p = s.fetch('http://tf2r.com/info.html')
|
237
|
+
# s.scrape_user(p) #=>
|
238
|
+
# [{:colour=>"ebe2ca", :name=>"User", :description=>"Every new or existing user has this rank."},
|
239
|
+
# {:colour=>"ffd700", :name=>"Trusted", :description=>"This rank can only be assigned on staff approval. Granted for 1,000~ Rep."},
|
240
|
+
# ...]
|
241
|
+
#
|
242
|
+
# @param info_page [Mechanize::Page] the info page.
|
243
|
+
# @return [Array] contains Hashes representing each of the ranks.
|
244
|
+
# * :name (+String+) — the rank's name.
|
245
|
+
# * :description (+String+) — the rank's description.
|
246
|
+
# * :colour (+String+) — the rank's hex colour code.
|
247
|
+
def scrape_ranks(info_page)
|
248
|
+
rank_divs = info_page.parser.css('#ranks').children
|
249
|
+
ranks = rank_divs.select { |div| div.children.size == 3 }
|
250
|
+
ranks.map { |div| extract_rank(div) }
|
251
|
+
end
|
173
252
|
|
174
|
-
|
175
|
-
negrep = raffle_infos.css('.downvb').text.to_i.to_s
|
253
|
+
private
|
176
254
|
|
177
|
-
|
255
|
+
# Extracts a rank hash from a rank div.
|
256
|
+
# Only for use by #scrape_ranks.
|
257
|
+
#
|
258
|
+
# @param rank_div [Nokogiri::XML::Element] a div containing the rank info.
|
259
|
+
# @return [Hash] a representation of a rank as outlined in #scrape_ranks.
|
260
|
+
def extract_rank(div)
|
261
|
+
name = div.children[0].text
|
262
|
+
description = div.children[2].text
|
263
|
+
colour = extract_hex_colour(div.children[0].attribute('style').to_s)
|
264
|
+
|
265
|
+
{name: name, description: description, colour: colour}
|
178
266
|
end
|
179
267
|
|
180
|
-
|
181
|
-
|
268
|
+
# Extracts a SteamID64 from a TF2R user link.
|
269
|
+
#
|
270
|
+
# @example
|
271
|
+
# extract_steam_id('http://tf2r.com/user/76561198061719848.html')
|
272
|
+
# #=> 76561198061719848
|
273
|
+
#
|
274
|
+
# @param href [String] The full user profile link.
|
275
|
+
# @return [Fixnum] The Steam ID.
|
276
|
+
def extract_steam_id(href)
|
277
|
+
/http:\/\/tf2r.com\/user\/(\d+)\.html/.match(href)[1].to_i
|
278
|
+
end
|
182
279
|
|
183
|
-
|
184
|
-
#
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
colours = rank_divs.map {|div| div.attributes['style'].value.split('color:#')[-1].split(';')[0].downcase.chomp }
|
280
|
+
# Extracts a lowercase hex colour code.
|
281
|
+
#
|
282
|
+
# @example
|
283
|
+
# extract_hex_colour('color:#70B01B;') #=> '70b01b'
|
284
|
+
#
|
285
|
+
# @param href [String] Any string containing a hex colour code.
|
286
|
+
# @return [String] The lowercase hex colour code.
|
287
|
+
def extract_hex_colour(str)
|
288
|
+
/#(\w+)\s*;/.match(str)[1].downcase
|
289
|
+
end
|
194
290
|
end
|
195
291
|
end
|
data/lib/tf2r/version.rb
CHANGED