tf2r 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 143e9f53a0a365e8bf98ac186a8b81acc7a3f5a0
4
- data.tar.gz: f417eab178af88208690c7f257389ae84bf49cf3
3
+ metadata.gz: cda160e8773382ca326bd42ad5f6469cbf63fd57
4
+ data.tar.gz: 3a43402a23424491b50559b27fbde5f2c72fd2a0
5
5
  SHA512:
6
- metadata.gz: 3030e85ac97ab0a5726f8b7aa4f5b0e03bfacd540fcdc581d3f92428432580c50ad9cb9c7975031e68f0fd24e35816258283ca4329bc32697e1c4dffd7082776
7
- data.tar.gz: 1cf2a0eeae95a3d7b83e248039e706ccbdb96c1cd59421550433a5c5848eff185abc65b5afece44c0254ab1781239485d36b2bf08860b5de73d02a3a22e17976
6
+ metadata.gz: 5a76b5c1c173ca4190a1c4e6197feacfe0a00d4304aa11d07d3de6e5e0f6c9eb9e8a5e335f4b93a594d4adbc50b150c6a9111fdd0f8f683ac9c571ae67cd632d
7
+ data.tar.gz: 5d1aa559bcb08c0b1479b99d56301987289732c15431056dfab4e458814bb39767a2e149577ec4e862fc3a5e129920e519472909ef44863e17a2f30b0cf472cf
data/.gitignore CHANGED
@@ -11,3 +11,6 @@
11
11
  *.o
12
12
  *.a
13
13
  mkmf.log
14
+
15
+ cookies.txt
16
+ tf2r-*.gem
data/.ruby-gemset CHANGED
@@ -1 +1 @@
1
- tf2r_scraper
1
+ tf2r
data/.travis.yml ADDED
@@ -0,0 +1,3 @@
1
+ language: ruby
2
+ rvm:
3
+ - "2.1.2"
data/CHANGELOG.md ADDED
File without changes
data/Gemfile.lock CHANGED
@@ -1,14 +1,26 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- tf2r (0.0.1)
4
+ tf2r (0.1.0)
5
5
  mechanize (~> 2.7)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
9
9
  specs:
10
+ addressable (2.3.6)
11
+ cane (2.6.2)
12
+ parallel
10
13
  coderay (1.1.0)
14
+ coveralls (0.7.0)
15
+ multi_json (~> 1.3)
16
+ rest-client
17
+ simplecov (>= 0.7)
18
+ term-ansicolor
19
+ thor
20
+ crack (0.4.2)
21
+ safe_yaml (~> 1.0.0)
11
22
  diff-lcs (1.2.5)
23
+ docile (1.1.5)
12
24
  domain_name (0.5.19)
13
25
  unf (>= 0.0.5, < 1.0.0)
14
26
  http-cookie (1.0.2)
@@ -25,16 +37,22 @@ GEM
25
37
  method_source (0.8.2)
26
38
  mime-types (2.3)
27
39
  mini_portile (0.6.0)
40
+ multi_json (1.10.1)
28
41
  net-http-digest_auth (1.4)
29
42
  net-http-persistent (2.9.4)
43
+ netrc (0.7.7)
30
44
  nokogiri (1.6.3.1)
31
45
  mini_portile (= 0.6.0)
32
46
  ntlm-http (0.1.1)
47
+ parallel (1.1.2)
33
48
  pry (0.10.0)
34
49
  coderay (~> 1.1.0)
35
50
  method_source (~> 0.8.1)
36
51
  slop (~> 3.4)
37
52
  rake (10.3.2)
53
+ rest-client (1.7.2)
54
+ mime-types (>= 1.16, < 3.0)
55
+ netrc (~> 0.7)
38
56
  rspec (3.0.0)
39
57
  rspec-core (~> 3.0.0)
40
58
  rspec-expectations (~> 3.0.0)
@@ -47,18 +65,36 @@ GEM
47
65
  rspec-mocks (3.0.3)
48
66
  rspec-support (~> 3.0.0)
49
67
  rspec-support (3.0.3)
68
+ safe_yaml (1.0.3)
69
+ simplecov (0.9.0)
70
+ docile (~> 1.1.0)
71
+ multi_json
72
+ simplecov-html (~> 0.8.0)
73
+ simplecov-html (0.8.0)
50
74
  slop (3.6.0)
75
+ term-ansicolor (1.3.0)
76
+ tins (~> 1.0)
77
+ thor (0.19.1)
78
+ tins (1.3.0)
51
79
  unf (0.1.4)
52
80
  unf_ext
53
81
  unf_ext (0.0.6)
82
+ vcr (2.9.2)
83
+ webmock (1.18.0)
84
+ addressable (>= 2.3.6)
85
+ crack (>= 0.3.2)
54
86
  webrobots (0.1.1)
55
87
 
56
88
  PLATFORMS
57
89
  ruby
58
90
 
59
91
  DEPENDENCIES
60
- bundler (~> 1.6)
92
+ bundler (~> 1.3)
93
+ cane (~> 2.6)
94
+ coveralls (~> 0.7)
61
95
  pry (~> 0.10)
62
96
  rake (~> 10.0)
63
97
  rspec (~> 3.0)
64
98
  tf2r!
99
+ vcr (~> 2.9)
100
+ webmock (~> 1.18)
data/README.md CHANGED
@@ -1,8 +1,31 @@
1
- # TF2R [![Code Climate](https://codeclimate.com/github/justinkim/tf2r/badges/gpa.svg)](https://codeclimate.com/github/justinkim/tf2r)
1
+ # TF2R - [tf2r.com][tf2r] interaction gem
2
2
 
3
+ [tf2r]: http://tf2r.com
4
+
5
+ [![Gem Version](http://img.shields.io/gem/v/tf2r.svg)][gem]
6
+ [![Build Status](http://img.shields.io/travis/justinkim/tf2r.svg)][travis]
7
+ [![Dependency Status](http://img.shields.io/gemnasium/justinkim/tf2r.svg)][gemnasium]
8
+ [![Coverage Status](https://img.shields.io/coveralls/justinkim/tf2r.svg)][coveralls]
9
+ [![Code Climate](http://img.shields.io/codeclimate/github/justinkim/tf2r.svg)][codeclimate]
10
+
11
+ [codeclimate]:https://codeclimate.com/github/justinkim/tf2r
12
+ [coveralls]: https://coveralls.io/r/justinkim/tf2r
13
+ [gem]: http://badge.fury.io/rb/tf2r
14
+ [gemnasium]: https://gemnasium.com/justinkim/tf2r
15
+ [travis]: https://travis-ci.org/justinkim/tf2r
16
+
17
+ GitHub: [https://github.com/justinkim/tf2r](https://github.com/justinkim/tf2r)
18
+
19
+ Documentation: [http://www.rubydoc.info/github/justinkim/tf2r](http://www.rubydoc.info/github/justinkim/tf2r)
20
+
21
+ Bugs: [https://github.com/justinkim/tf2r/issues](https://github.com/justinkim/tf2r/issues)
22
+
23
+ ## Description
3
24
  This gem provides a `TF2R::Scraper` that has the ability to scrape various pages on [TF2R](http://tf2r.com) into usable data.
4
25
 
5
- Yes, this gem is [semantically versioned](http://semver.org/)!
26
+ Yes, this gem is [semantically versioned][semver]!
27
+
28
+ [semver]: http://semver.org
6
29
 
7
30
  ## Installation
8
31
 
@@ -31,3 +54,8 @@ TODO: Write usage instructions here
31
54
  3. Commit your changes (`git commit -am 'Add some feature'`)
32
55
  4. Push to the branch (`git push origin my-new-feature`)
33
56
  5. Create a new Pull Request
57
+
58
+ ## License
59
+ Released under the ISC license. See the [LICENSE][] for further details.
60
+
61
+ [license]: LICENSE.md
data/Rakefile CHANGED
@@ -1,2 +1,9 @@
1
- require "bundler/gem_tasks"
1
+ require 'bundler/gem_tasks'
2
+ require 'rspec/core/rake_task'
2
3
 
4
+ desc 'Run specs'
5
+ RSpec::Core::RakeTask.new
6
+
7
+ desc 'Default: run specs'
8
+ task :default => :spec
9
+ task :test => :spec
data/lib/tf2r/scraper.rb CHANGED
@@ -1,195 +1,291 @@
1
1
  module TF2R
2
+ # @author Justin Kim
2
3
  class Scraper
4
+ # Creates a Scraper. Pass values using the options hash.
5
+ #
6
+ # :user_agent a String used for the User-Agent header
7
+ # :cookies_txt a File containing cookies to load into the Mechanize agent
8
+ #
9
+ # @param opts [Hash] options to create a Scraper with
10
+ # @option opts [String] :user_agent a custom User-Agent header content
11
+ # @option opts [File] :cookies_txt a cookies.txt to load the Mechanize
12
+ # agent with
3
13
  def initialize(options)
4
14
  @mech = Mechanize.new { |mech|
5
15
  mech.user_agent = options[:user_agent] || "TF2R::Scraper #{VERSION}"
6
16
  }
7
17
 
8
- @mech.cookie_jar.load(options[:cookies_txt], :cookiestxt) if options[:cookies_txt]
18
+ load_cookies(options[:cookies_txt]) if options[:cookies_txt]
9
19
  end
10
20
 
21
+ # Loads the Mechanize agent with cookies from a cookies.txt.
22
+ #
23
+ # Certain pages on TF2R require a session with a logged-in user.
24
+ # This requires a Netscape-style cookies.txt that contains a valid
25
+ # "session" cookie for "tf2r.com".
26
+ #
27
+ # @param cookies_txt [File] the cookies.txt file.
28
+ # @return [Mechanize::CookieJar] the CookieJar of the Mechanize agent.
29
+ def load_cookies(cookies_txt)
30
+ @mech.cookie_jar.load(cookies_txt, :cookiestxt)
31
+ end
32
+
33
+ # Fetches the page at the given URL.
34
+ #
35
+ # @param url [String] the desired URL.
36
+ # @return [Mechanize::Page] the page given by Mechanize.
11
37
  def fetch(url)
12
38
  @mech.get(url)
13
39
  end
14
- end
15
- end
16
40
 
17
- __END__
18
-
19
- # This is the old Scraper from NervyPipe.
20
- class Scraper
21
- def initialize(cookies_txt_path)
22
- @cookies_txt_path = cookies_txt_path
23
-
24
- @main = Mechanize.new { |agent|
25
- # the User-Agent field in headers
26
- agent.user_agent = 'Jenna Bot'
27
- }
28
-
29
- auth_cookies(@main)
30
- end
31
-
32
- def auth_cookies(mech)
33
- # Before anything, load our auth cookies into the cookie jar
34
- # This requires a Netscape-style cookies.txt to be in the working dir
41
+ # Scrapes TF2R for all active raffles.
35
42
  #
36
- # cookies.txt must include at least a valid "session" cookie from tf2r.com
37
- mech.cookie_jar.load_cookiestxt(@cookies_txt_path)
38
- end
39
-
40
- # Simply return the Mechanize::Page for a url
41
- def fetch(url)
42
- @main.get(url)
43
- end
44
-
45
- def run(type)
46
- case type
47
- when :raffle
48
- scrape_raffle(@main.get 'http://tf2r.com/kblf84f.html')
49
- when :user
50
- scrape_user(@main.get 'http://tf2r.com/user/76561198061719848.html')
51
- when :main
52
- scrape_main_page
53
- when :ranks
54
- scrape_ranks
43
+ # See http://tf2r.com/raffles.html
44
+ #
45
+ # @example
46
+ # s.scrape_main_page #=> ['http://tf2r.com/kold.html',
47
+ # 'http://tf2r.com/knew.html',
48
+ # 'http://tf2r.com/knewest.html']
49
+ #
50
+ # @return [Hash] String links of all active raffles in chronological
51
+ # order (oldest to newest by creation time).
52
+ def scrape_main_page
53
+ page = fetch('http://tf2r.com/raffles.html')
54
+
55
+ # All raffle links begin with 'tf2r.com/k'
56
+ raffle_links = page.links_with(href: /tf2r\.com\/k/)
57
+ raffle_links.map! { |x| x.uri.to_s }
58
+ raffle_links.reverse!
55
59
  end
56
- end
57
-
58
- def scrape_main_page
59
- page = @main.get('http://tf2r.com/raffles.html')
60
-
61
- # This regex matches all Mechanize::Page::Links on the main raffles page that are actual raffles
62
- raffle_mech_links = page.links_with(href: /tf2r\.com\/k/)
63
-
64
- # an array of strings, which are raffle links
65
- raffle_links = raffle_mech_links.map { |x| x.uri.to_s }
66
-
67
- # the array should have raffles from bottom-to-top, old-to-new
68
- raffle_links.reverse!
69
- end
70
-
71
- def scrape_raffle_for_user(page)
72
- # This is an array of all things Reag was nice enough to class "raffle_infomation"
73
- # Reag made a typo, so the class really is "raffle_infomation"
74
- raffle_infos = page.parser.css('.raffle_infomation')
75
-
76
- # User information
77
- steam_id = raffle_infos[2].css('a')[0].attributes['href'].text.split('/')[-1].split('.')[0].to_i
78
- username = raffle_infos[2].css('a').text
79
- avatar_link = raffle_infos[1].css('a')[0].css('img')[0].attributes['src'].text
80
-
81
- # posrept will be nil if the Scraper's user has already voted on a user's rep in the raffle
82
- posrepa = raffle_infos.css('.upvb').text.split
83
- posrepa.delete('+')
84
- posrep = posrepa[-1].to_i.to_s
85
-
86
- negrepa = raffle_infos.css('.downvb').text.split
87
- negrepa.delete('+')
88
- negrep = negrepa[-1].to_i.to_s
89
-
90
- colour = raffle_infos[2].css('a')[0].attributes['style'].value.split('#')[-1].split(';')[0].downcase.chomp
91
- # The creator of the raffle, using above
92
- userhash = {steam_id: steam_id, username: username, avatar_link: avatar_link, posrep: posrep, negrep: negrep, colour: colour}
93
- end
94
-
95
- def scrape_raffle_for_raffle(page)
96
- # This is an array of all things Reag was nice enough to class "raffle_infomation"
97
- # Reag made a typo, so the class really is "raffle_infomation"
98
- raffle_infos = page.parser.css('.raffle_infomation')
99
-
100
- # Raffle information
101
- uri = page.uri # is a URI:HTTP
102
- path = uri.path # is "/welcome.html" for "http://tf2r.com/welcome.html"
103
- link_snippet = path.split('/')[1].split('.html')[0] # is 'kabc123' for 'http://tf2r.com/kabc123.html'
104
60
 
105
- title = raffle_infos[0].text.split('Title: ')[-1]
106
- # Lots of info in a single table
107
- raffle_tds = raffle_infos[3].css('td')
108
- description = raffle_tds[1].text
109
-
110
- start_time_string = raffle_tds[9].text
111
- start_time = DateTime.strptime(start_time_string, '%a, %d %b %Y %H:%M:%S %z').to_time
112
- end_time_string = raffle_tds[11].text
113
- end_time = DateTime.strptime(end_time_string, '%a, %d %b %Y %H:%M:%S %z').to_time
114
-
115
- win_chance_pre_round = raffle_tds[5].text.to_f / 100 # also #winc
116
- win_chance = win_chance_pre_round.round(5)
117
-
118
- entries = raffle_tds[7].text # also #entry
119
- # Entries looks like "42/123", as "current/max"
120
- # Split by slash, multiple assignment to array with elements mapped to integers
121
- # Equivalent to a = b[0].to_i; c = b[1].to_i
122
- current_entries, max_entries = entries.split('/').map { |x| x.to_i }
123
-
124
- is_done = end_time <= Time.now || current_entries == max_entries || page.parser.css('.welcome_font').text.include?('No winners') || page.parser.css('.welcome_font').text.include?('Winner(s):')
125
-
126
- rafflehash = {link_snippet: link_snippet, title: title, description: description, start_time: start_time, end_time: end_time,
127
- win_chance: win_chance, current_entries: current_entries, max_entries: max_entries, is_done: is_done}
128
- end
129
-
130
- def scrape_raffle_for_participants(page)
131
- participants = []
132
- participant_divs = page.parser.css('.pentry')
133
- participant_divs.each do |participant|
134
- steam_id = participant.css('a')[-1].attributes['href'].text.split('/')[-1].split('.')[0].to_i
135
- username = participant.text
136
- colour = participant.css('a')[-1].attributes['style'].text.split('#')[-1].split(';')[0].downcase.chomp
137
-
138
- participants << {steam_id: steam_id, username: username, colour: colour}
61
+ # Scrapes a raffle page for information about the creator.
62
+ #
63
+ # @example
64
+ # p = s.fetch('http://tf2r.com/kstzcbd.html')
65
+ # s.scrape_raffle_for_creator(p) #=>
66
+ # {:steam_id=>76561198061719848,
67
+ # :username=>"Yulli",
68
+ # :avatar_link=>
69
+ # "http://media.steampowered.com/steamcommunity/public/images/avatars/bc/bc9dc4302d23f2e2f37f59c59f29c27dbc8cade6_full.jpg",
70
+ # :posrep=>11458,
71
+ # :negrep=>0,
72
+ # :colour=>"70b01b"}
73
+ #
74
+ # @param page [Mechanize::Page] the raffle page.
75
+ # @return [Hash] a representation of a user, the raffle creator.
76
+ # * :steam_id (+Fixnum+) the creator's SteamID64.
77
+ # * :username (+String+) the creator's username.
78
+ # * :avatar_link (+String+) a link to the creator's avatar.
79
+ # * :posrep (+Fixnum+) — the creator's positive rep.
80
+ # * :negrep (+Fixnum+) the creator's negative rep.
81
+ # * :colour (+String+) — hex colour code of the creator's username.
82
+ def scrape_raffle_for_creator(page)
83
+ # Reag classed some things "raffle_infomation". That's spelled right.
84
+ infos = page.parser.css('.raffle_infomation')
85
+
86
+ # The main 'a' element, containing the creator's username.
87
+ user_anchor = infos[2].css('a')[0]
88
+
89
+ steam_id = extract_steam_id(user_anchor.attribute('href').to_s)
90
+ username = user_anchor.text
91
+ avatar_link = infos[1].css('img')[0].attribute('src').to_s
92
+ posrep = /(\d+)/.match(infos.css('.upvb').text)[1].to_i
93
+ negrep = /(\d+)/.match(infos.css('.downvb').text)[1].to_i
94
+
95
+ # The creator's username colour. Corresponds to rank.
96
+ colour = extract_hex_colour(user_anchor.attribute('style').to_s)
97
+
98
+ {steam_id: steam_id, username: username, avatar_link: avatar_link,
99
+ posrep: posrep, negrep: negrep, colour: colour}
139
100
  end
140
101
 
141
- participants.uniq.reverse
142
- end
143
-
144
- def scrape_raffle(page, portions = :all)
145
- userhash, rafflehash, participants = {}, {}, []
146
-
147
- case portions
148
- when :core
149
- userhash = scrape_raffle_for_user(page)
150
- rafflehash = scrape_raffle_for_raffle(page)
151
- when :participants
152
- participants = scrape_raffle_for_participants(page)
153
- else
154
- userhash = scrape_raffle_for_user(page)
155
- rafflehash = scrape_raffle_for_raffle(page)
156
- participants = scrape_raffle_for_participants(page)
102
+ # Scrapes a raffle page for information about the raffle.
103
+ #
104
+ # @example
105
+ # p = s.fetch('http://tf2r.com/kstzcbd.html')
106
+ # s.scrape_raffle_for_raffle(p) #=>
107
+ # {:link_snippet=>"kstzcbd",
108
+ # :title=>"Just one refined [1 hour]",
109
+ # :description=>"Plain and simple.",
110
+ # :start_time=>2012-10-29 09:51:45 -0400,
111
+ # :end_time=>2012-10-29 09:53:01 -0400,
112
+ # :win_chance=>0.1,
113
+ # :current_entries=>10,
114
+ # :max_entries=>10,
115
+ # :is_done=>true}
116
+ #
117
+ # @param page [Mechanize::Page] the raffle page.
118
+ # @return [Hash] a representation of the raffle.
119
+ # * :link_snippet (+String+) — the "raffle id" in the URL.
120
+ # * :title (+String+) — the raffle's title.
121
+ # * :description (+String+) — the raffle's "message".
122
+ # * :start_time (+Time+) — the creation time of the raffle.
123
+ # * :end_time (+Time+) — the projects/observed end time for the raffle.
124
+ # * :win_chance (+Float+) — a participant's chance to win the raffle.
125
+ # * :current_entries (+Fixnum+) — the current number of participants.
126
+ # * :max_entries (+Fixnum+) — the maximum number of particpants allowed.
127
+ # * :is_done (+Boolean+) — whether new users can enter the raffle.
128
+ def scrape_raffle_for_raffle(page)
129
+ # Reag classed some things "raffle_infomation". That's spelled right.
130
+ infos = page.parser.css('.raffle_infomation')
131
+
132
+ # Elements of the main raffle info table.
133
+ raffle_tds = infos[3].css('td')
134
+
135
+ # 'kabc123' for http://tf2r.com/kabc123.html'
136
+ link_snippet = /\/(k.+)\.html/.match(page.uri.path)[1]
137
+ title = infos[0].text.split('Title: ')[-1]
138
+ description = raffle_tds[1].text
139
+
140
+ start_time = raffle_tds[9].attribute('data-rstart-unix').to_s
141
+ start_time = DateTime.strptime(start_time, '%s').to_time
142
+ end_time = raffle_tds[11].attribute('data-rsend-unix').to_s
143
+ end_time= DateTime.strptime(end_time, '%s').to_time
144
+
145
+ win_chance = /(.+)%/.match(infos.css('#winc').text)[1].to_f / 100
146
+
147
+ entries = /(\d+)\/(\d+)/.match(infos.css('#entry').text)
148
+ current_entries = entries[1].to_i
149
+ max_entries = entries[2].to_i
150
+
151
+ text = page.parser.css('.welcome_font').css('div')[3..-1].text
152
+ is_done = end_time < Time.now ||
153
+ current_entries == max_entries ||
154
+ page.parser.css('.welcome_font')[5..-1].text.downcase.include?('winner')
155
+
156
+ {link_snippet: link_snippet, title: title, description: description,
157
+ start_time: start_time, end_time: end_time, win_chance: win_chance,
158
+ current_entries: current_entries, max_entries: max_entries,
159
+ is_done: is_done}
157
160
  end
158
161
 
159
- [userhash, rafflehash, participants]
160
- end
162
+ # Scrapes a raffle page for all the participants.
163
+ #
164
+ # TODO: add an example
165
+ #
166
+ # @param page [Mechanize::Page] the raffle page.
167
+ # @return [Array] contains Hashes representing each of the participants,
168
+ # in chronological order (first entered to last).
169
+ # * :steam_id (+Fixnum+) — the participant's SteamID64.
170
+ # * :username (+String+) — the participant's username.
171
+ # * :colour (+String+) — hex colour code of the participant's username.
172
+ def scrape_raffle_for_participants(page)
173
+ participants = []
174
+ participant_divs = page.parser.css('.pentry')
175
+ participant_divs.each do |participant|
176
+ user_anchor = participant.children[1]
177
+ steam_id = extract_steam_id(user_anchor.to_s)
178
+ username = participant.text
179
+ colour = extract_hex_colour(user_anchor.children[0].attribute('style'))
180
+
181
+ participants << {steam_id: steam_id, username: username, colour: colour}
182
+ end
183
+
184
+ participants.reverse!
185
+ end
161
186
 
162
- def scrape_user(user_page)
163
- if user_page.parser.css('.profile_info').empty?
164
- username, avatar_link, posrep, negrep, colour = nil, nil, nil, nil, nil
165
- steam_id = user_page.uri.path.split('/')[-1].split('.')[0].to_i
166
- else
167
- pp user_page.parser.css('.profile_info')
168
- raffle_infos = user_page.parser.css('.raffle_infomation') # sic
187
+ # Scrapes a user page for information about the user.
188
+ #
189
+ # @example
190
+ # p = s.fetch('http://tf2r.com/user/76561198061719848.html')
191
+ # s.scrape_user(p) #=>
192
+ # {:steam_id=>76561198061719848,
193
+ # :username=>"Yulli",
194
+ # :avatar_link=>
195
+ # "http://media.steampowered.com/steamcommunity/public/images/avatars/bc/bc9dc4302d23f2e2f37f59c59f29c27dbc8cade6_full.jpg",
196
+ # :posrep=>11459,
197
+ # :negrep=>0,
198
+ # :colour=>"70b01b"}
199
+ #
200
+ # @param user_page [Mechanize::Page] the user page.
201
+ # @return [Hash] a representation of the user.
202
+ # * :steam_id (+Fixnum+) — the user's SteamID64.
203
+ # * :username (+String+) — the user's username.
204
+ # * :avatar_link (+String+) — a link to the user's avatar.
205
+ # * :posrep (+Fixnum+) — the user's positive rep.
206
+ # * :negrep (+Fixnum+) — the user's negative rep.
207
+ # * :colour (+String+) — hex colour code of the user's username.
208
+ def scrape_user(user_page)
209
+ if user_page.parser.css('.profile_info').empty?
210
+ # TODO: Should raise an exception here
211
+ steam_id = extract_steam_id(user_page.uri.to_s)
212
+ username, avatar_link, posrep, negrep, colour = nil, nil, nil, nil, nil
213
+ else
214
+ infos = user_page.parser.css('.raffle_infomation') #sic
215
+ user_anchor = infos[2].css('a')[0]
216
+
217
+ steam_id = extract_steam_id(user_page.uri.to_s)
218
+ username = /TF2R Item Raffles - (.+)/.match(user_page.title)[1]
219
+ avatar_link = infos[0].css('img')[0].attribute('src').to_s
220
+
221
+ posrep = infos.css('.upvb').text.to_i
222
+ negrep = infos.css('.downvb').text.to_i
223
+
224
+ colour = extract_hex_colour(infos[1].css('a')[0].attribute('style').to_s)
225
+ end
226
+
227
+ {steam_id: steam_id, username: username, avatar_link: avatar_link,
228
+ posrep: posrep, negrep: negrep, colour: colour}
229
+ end
169
230
 
170
- steam_id = user_page.uri.path.split('/')[-1].split('.')[0].to_i
171
- username = user_page.parser.title.split('TF2R Item Raffles - ')[-1]
172
- avatar_link = raffle_infos[0].css('img')[0].attributes['src'].text
231
+ # Scrapes the TF2R info page for available user ranks.
232
+ #
233
+ # See http://tf2r.com/info.html.
234
+ #
235
+ # @example
236
+ # p = s.fetch('http://tf2r.com/info.html')
237
+ # s.scrape_user(p) #=>
238
+ # [{:colour=>"ebe2ca", :name=>"User", :description=>"Every new or existing user has this rank."},
239
+ # {:colour=>"ffd700", :name=>"Trusted", :description=>"This rank can only be assigned on staff approval. Granted for 1,000~ Rep."},
240
+ # ...]
241
+ #
242
+ # @param info_page [Mechanize::Page] the info page.
243
+ # @return [Array] contains Hashes representing each of the ranks.
244
+ # * :name (+String+) — the rank's name.
245
+ # * :description (+String+) — the rank's description.
246
+ # * :colour (+String+) — the rank's hex colour code.
247
+ def scrape_ranks(info_page)
248
+ rank_divs = info_page.parser.css('#ranks').children
249
+ ranks = rank_divs.select { |div| div.children.size == 3 }
250
+ ranks.map { |div| extract_rank(div) }
251
+ end
173
252
 
174
- posrep = raffle_infos.css('.upvb').text.to_i.to_s
175
- negrep = raffle_infos.css('.downvb').text.to_i.to_s
253
+ private
176
254
 
177
- colour = raffle_infos[1].css('a')[0].attributes['style'].value.split('#')[-1].split(';')[0].downcase.chomp
255
+ # Extracts a rank hash from a rank div.
256
+ # Only for use by #scrape_ranks.
257
+ #
258
+ # @param rank_div [Nokogiri::XML::Element] a div containing the rank info.
259
+ # @return [Hash] a representation of a rank as outlined in #scrape_ranks.
260
+ def extract_rank(div)
261
+ name = div.children[0].text
262
+ description = div.children[2].text
263
+ colour = extract_hex_colour(div.children[0].attribute('style').to_s)
264
+
265
+ {name: name, description: description, colour: colour}
178
266
  end
179
267
 
180
- userhash = {steam_id: steam_id, username: username, avatar_link: avatar_link, posrep: posrep, negrep: negrep, colour: colour}
181
- end
268
+ # Extracts a SteamID64 from a TF2R user link.
269
+ #
270
+ # @example
271
+ # extract_steam_id('http://tf2r.com/user/76561198061719848.html')
272
+ # #=> 76561198061719848
273
+ #
274
+ # @param href [String] The full user profile link.
275
+ # @return [Fixnum] The Steam ID.
276
+ def extract_steam_id(href)
277
+ /http:\/\/tf2r.com\/user\/(\d+)\.html/.match(href)[1].to_i
278
+ end
182
279
 
183
- def scrape_ranks
184
- # This scrapes the info page for the various ranks that exist
185
- page = @main.get('http://tf2r.com/info.html')
186
-
187
- ranks_div = page.parser.css('#ranks')
188
- divs = ranks_div.css('div')
189
- rank_divs = []
190
- divs.each { |div|
191
- rank_divs << div unless div.attributes['style'].nil? || !(div.attributes['style'].value.include? 'color')
192
- }
193
- colours = rank_divs.map {|div| div.attributes['style'].value.split('color:#')[-1].split(';')[0].downcase.chomp }
280
+ # Extracts a lowercase hex colour code.
281
+ #
282
+ # @example
283
+ # extract_hex_colour('color:#70B01B;') #=> '70b01b'
284
+ #
285
+ # @param href [String] Any string containing a hex colour code.
286
+ # @return [String] The lowercase hex colour code.
287
+ def extract_hex_colour(str)
288
+ /#(\w+)\s*;/.match(str)[1].downcase
289
+ end
194
290
  end
195
291
  end
data/lib/tf2r/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module TF2R
2
- VERSION = "0.0.1"
2
+ VERSION = '0.1.0'
3
3
  end