tf2r 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 143e9f53a0a365e8bf98ac186a8b81acc7a3f5a0
4
+ data.tar.gz: f417eab178af88208690c7f257389ae84bf49cf3
5
+ SHA512:
6
+ metadata.gz: 3030e85ac97ab0a5726f8b7aa4f5b0e03bfacd540fcdc581d3f92428432580c50ad9cb9c7975031e68f0fd24e35816258283ca4329bc32697e1c4dffd7082776
7
+ data.tar.gz: 1cf2a0eeae95a3d7b83e248039e706ccbdb96c1cd59421550433a5c5848eff185abc65b5afece44c0254ab1781239485d36b2bf08860b5de73d02a3a22e17976
data/.gitignore ADDED
@@ -0,0 +1,13 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+ *.bundle
10
+ *.so
11
+ *.o
12
+ *.a
13
+ mkmf.log
data/.ruby-gemset ADDED
@@ -0,0 +1 @@
1
+ tf2r_scraper
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.1.2
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in tf2r.gemspec
4
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,64 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ tf2r (0.0.1)
5
+ mechanize (~> 2.7)
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ coderay (1.1.0)
11
+ diff-lcs (1.2.5)
12
+ domain_name (0.5.19)
13
+ unf (>= 0.0.5, < 1.0.0)
14
+ http-cookie (1.0.2)
15
+ domain_name (~> 0.5)
16
+ mechanize (2.7.3)
17
+ domain_name (~> 0.5, >= 0.5.1)
18
+ http-cookie (~> 1.0)
19
+ mime-types (~> 2.0)
20
+ net-http-digest_auth (~> 1.1, >= 1.1.1)
21
+ net-http-persistent (~> 2.5, >= 2.5.2)
22
+ nokogiri (~> 1.4)
23
+ ntlm-http (~> 0.1, >= 0.1.1)
24
+ webrobots (>= 0.0.9, < 0.2)
25
+ method_source (0.8.2)
26
+ mime-types (2.3)
27
+ mini_portile (0.6.0)
28
+ net-http-digest_auth (1.4)
29
+ net-http-persistent (2.9.4)
30
+ nokogiri (1.6.3.1)
31
+ mini_portile (= 0.6.0)
32
+ ntlm-http (0.1.1)
33
+ pry (0.10.0)
34
+ coderay (~> 1.1.0)
35
+ method_source (~> 0.8.1)
36
+ slop (~> 3.4)
37
+ rake (10.3.2)
38
+ rspec (3.0.0)
39
+ rspec-core (~> 3.0.0)
40
+ rspec-expectations (~> 3.0.0)
41
+ rspec-mocks (~> 3.0.0)
42
+ rspec-core (3.0.3)
43
+ rspec-support (~> 3.0.0)
44
+ rspec-expectations (3.0.3)
45
+ diff-lcs (>= 1.2.0, < 2.0)
46
+ rspec-support (~> 3.0.0)
47
+ rspec-mocks (3.0.3)
48
+ rspec-support (~> 3.0.0)
49
+ rspec-support (3.0.3)
50
+ slop (3.6.0)
51
+ unf (0.1.4)
52
+ unf_ext
53
+ unf_ext (0.0.6)
54
+ webrobots (0.1.1)
55
+
56
+ PLATFORMS
57
+ ruby
58
+
59
+ DEPENDENCIES
60
+ bundler (~> 1.6)
61
+ pry (~> 0.10)
62
+ rake (~> 10.0)
63
+ rspec (~> 3.0)
64
+ tf2r!
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2014, Justin Kim <yulli@yulli.org>
2
+
3
+ Permission to use, copy, modify, and/or distribute this software for any
4
+ purpose with or without fee is hereby granted, provided that the above
5
+ copyright notice and this permission notice appear in all copies.
6
+
7
+ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,33 @@
1
+ # TF2R [![Code Climate](https://codeclimate.com/github/justinkim/tf2r/badges/gpa.svg)](https://codeclimate.com/github/justinkim/tf2r)
2
+
3
+ This gem provides a `TF2R::Scraper` that has the ability to scrape various pages on [TF2R](http://tf2r.com) into usable data.
4
+
5
+ Yes, this gem is [semantically versioned](http://semver.org/)!
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'tf2r'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install tf2r
22
+
23
+ ## Usage
24
+
25
+ TODO: Write usage instructions here
26
+
27
+ ## Contributing
28
+
29
+ 1. Fork it ( https://github.com/[my-github-username]/tf2r/fork )
30
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
31
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
32
+ 4. Push to the branch (`git push origin my-new-feature`)
33
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
@@ -0,0 +1,195 @@
1
+ module TF2R
2
+ class Scraper
3
+ def initialize(options)
4
+ @mech = Mechanize.new { |mech|
5
+ mech.user_agent = options[:user_agent] || "TF2R::Scraper #{VERSION}"
6
+ }
7
+
8
+ @mech.cookie_jar.load(options[:cookies_txt], :cookiestxt) if options[:cookies_txt]
9
+ end
10
+
11
+ def fetch(url)
12
+ @mech.get(url)
13
+ end
14
+ end
15
+ end
16
+
17
+ __END__
18
+
19
+ # This is the old Scraper from NervyPipe.
20
+ class Scraper
21
+ def initialize(cookies_txt_path)
22
+ @cookies_txt_path = cookies_txt_path
23
+
24
+ @main = Mechanize.new { |agent|
25
+ # the User-Agent field in headers
26
+ agent.user_agent = 'Jenna Bot'
27
+ }
28
+
29
+ auth_cookies(@main)
30
+ end
31
+
32
+ def auth_cookies(mech)
33
+ # Before anything, load our auth cookies into the cookie jar
34
+ # This requires a Netscape-style cookies.txt to be in the working dir
35
+ #
36
+ # cookies.txt must include at least a valid "session" cookie from tf2r.com
37
+ mech.cookie_jar.load_cookiestxt(@cookies_txt_path)
38
+ end
39
+
40
+ # Simply return the Mechanize::Page for a url
41
+ def fetch(url)
42
+ @main.get(url)
43
+ end
44
+
45
+ def run(type)
46
+ case type
47
+ when :raffle
48
+ scrape_raffle(@main.get 'http://tf2r.com/kblf84f.html')
49
+ when :user
50
+ scrape_user(@main.get 'http://tf2r.com/user/76561198061719848.html')
51
+ when :main
52
+ scrape_main_page
53
+ when :ranks
54
+ scrape_ranks
55
+ end
56
+ end
57
+
58
+ def scrape_main_page
59
+ page = @main.get('http://tf2r.com/raffles.html')
60
+
61
+ # This regex matches all Mechanize::Page::Links on the main raffles page that are actual raffles
62
+ raffle_mech_links = page.links_with(href: /tf2r\.com\/k/)
63
+
64
+ # an array of strings, which are raffle links
65
+ raffle_links = raffle_mech_links.map { |x| x.uri.to_s }
66
+
67
+ # the array should have raffles from bottom-to-top, old-to-new
68
+ raffle_links.reverse!
69
+ end
70
+
71
+ def scrape_raffle_for_user(page)
72
+ # This is an array of all things Reag was nice enough to class "raffle_infomation"
73
+ # Reag made a typo, so the class really is "raffle_infomation"
74
+ raffle_infos = page.parser.css('.raffle_infomation')
75
+
76
+ # User information
77
+ steam_id = raffle_infos[2].css('a')[0].attributes['href'].text.split('/')[-1].split('.')[0].to_i
78
+ username = raffle_infos[2].css('a').text
79
+ avatar_link = raffle_infos[1].css('a')[0].css('img')[0].attributes['src'].text
80
+
81
+ # posrept will be nil if the Scraper's user has already voted on a user's rep in the raffle
82
+ posrepa = raffle_infos.css('.upvb').text.split
83
+ posrepa.delete('+')
84
+ posrep = posrepa[-1].to_i.to_s
85
+
86
+ negrepa = raffle_infos.css('.downvb').text.split
87
+ negrepa.delete('+')
88
+ negrep = negrepa[-1].to_i.to_s
89
+
90
+ colour = raffle_infos[2].css('a')[0].attributes['style'].value.split('#')[-1].split(';')[0].downcase.chomp
91
+ # The creator of the raffle, using above
92
+ userhash = {steam_id: steam_id, username: username, avatar_link: avatar_link, posrep: posrep, negrep: negrep, colour: colour}
93
+ end
94
+
95
+ def scrape_raffle_for_raffle(page)
96
+ # This is an array of all things Reag was nice enough to class "raffle_infomation"
97
+ # Reag made a typo, so the class really is "raffle_infomation"
98
+ raffle_infos = page.parser.css('.raffle_infomation')
99
+
100
+ # Raffle information
101
+ uri = page.uri # is a URI:HTTP
102
+ path = uri.path # is "/welcome.html" for "http://tf2r.com/welcome.html"
103
+ link_snippet = path.split('/')[1].split('.html')[0] # is 'kabc123' for 'http://tf2r.com/kabc123.html'
104
+
105
+ title = raffle_infos[0].text.split('Title: ')[-1]
106
+ # Lots of info in a single table
107
+ raffle_tds = raffle_infos[3].css('td')
108
+ description = raffle_tds[1].text
109
+
110
+ start_time_string = raffle_tds[9].text
111
+ start_time = DateTime.strptime(start_time_string, '%a, %d %b %Y %H:%M:%S %z').to_time
112
+ end_time_string = raffle_tds[11].text
113
+ end_time = DateTime.strptime(end_time_string, '%a, %d %b %Y %H:%M:%S %z').to_time
114
+
115
+ win_chance_pre_round = raffle_tds[5].text.to_f / 100 # also #winc
116
+ win_chance = win_chance_pre_round.round(5)
117
+
118
+ entries = raffle_tds[7].text # also #entry
119
+ # Entries looks like "42/123", as "current/max"
120
+ # Split by slash, multiple assignment to array with elements mapped to integers
121
+ # Equivalent to a = b[0].to_i; c = b[1].to_i
122
+ current_entries, max_entries = entries.split('/').map { |x| x.to_i }
123
+
124
+ is_done = end_time <= Time.now || current_entries == max_entries || page.parser.css('.welcome_font').text.include?('No winners') || page.parser.css('.welcome_font').text.include?('Winner(s):')
125
+
126
+ rafflehash = {link_snippet: link_snippet, title: title, description: description, start_time: start_time, end_time: end_time,
127
+ win_chance: win_chance, current_entries: current_entries, max_entries: max_entries, is_done: is_done}
128
+ end
129
+
130
+ def scrape_raffle_for_participants(page)
131
+ participants = []
132
+ participant_divs = page.parser.css('.pentry')
133
+ participant_divs.each do |participant|
134
+ steam_id = participant.css('a')[-1].attributes['href'].text.split('/')[-1].split('.')[0].to_i
135
+ username = participant.text
136
+ colour = participant.css('a')[-1].attributes['style'].text.split('#')[-1].split(';')[0].downcase.chomp
137
+
138
+ participants << {steam_id: steam_id, username: username, colour: colour}
139
+ end
140
+
141
+ participants.uniq.reverse
142
+ end
143
+
144
+ def scrape_raffle(page, portions = :all)
145
+ userhash, rafflehash, participants = {}, {}, []
146
+
147
+ case portions
148
+ when :core
149
+ userhash = scrape_raffle_for_user(page)
150
+ rafflehash = scrape_raffle_for_raffle(page)
151
+ when :participants
152
+ participants = scrape_raffle_for_participants(page)
153
+ else
154
+ userhash = scrape_raffle_for_user(page)
155
+ rafflehash = scrape_raffle_for_raffle(page)
156
+ participants = scrape_raffle_for_participants(page)
157
+ end
158
+
159
+ [userhash, rafflehash, participants]
160
+ end
161
+
162
+ def scrape_user(user_page)
163
+ if user_page.parser.css('.profile_info').empty?
164
+ username, avatar_link, posrep, negrep, colour = nil, nil, nil, nil, nil
165
+ steam_id = user_page.uri.path.split('/')[-1].split('.')[0].to_i
166
+ else
167
+ pp user_page.parser.css('.profile_info')
168
+ raffle_infos = user_page.parser.css('.raffle_infomation') # sic
169
+
170
+ steam_id = user_page.uri.path.split('/')[-1].split('.')[0].to_i
171
+ username = user_page.parser.title.split('TF2R Item Raffles - ')[-1]
172
+ avatar_link = raffle_infos[0].css('img')[0].attributes['src'].text
173
+
174
+ posrep = raffle_infos.css('.upvb').text.to_i.to_s
175
+ negrep = raffle_infos.css('.downvb').text.to_i.to_s
176
+
177
+ colour = raffle_infos[1].css('a')[0].attributes['style'].value.split('#')[-1].split(';')[0].downcase.chomp
178
+ end
179
+
180
+ userhash = {steam_id: steam_id, username: username, avatar_link: avatar_link, posrep: posrep, negrep: negrep, colour: colour}
181
+ end
182
+
183
+ def scrape_ranks
184
+ # This scrapes the info page for the various ranks that exist
185
+ page = @main.get('http://tf2r.com/info.html')
186
+
187
+ ranks_div = page.parser.css('#ranks')
188
+ divs = ranks_div.css('div')
189
+ rank_divs = []
190
+ divs.each { |div|
191
+ rank_divs << div unless div.attributes['style'].nil? || !(div.attributes['style'].value.include? 'color')
192
+ }
193
+ colours = rank_divs.map {|div| div.attributes['style'].value.split('color:#')[-1].split(';')[0].downcase.chomp }
194
+ end
195
+ end
@@ -0,0 +1,3 @@
1
+ module TF2R
2
+ VERSION = "0.0.1"
3
+ end
data/lib/tf2r.rb ADDED
@@ -0,0 +1,9 @@
1
+ require 'mechanize'
2
+ require 'pry'
3
+
4
+ require 'tf2r/scraper'
5
+ require 'tf2r/version'
6
+
7
+ module TF2R
8
+ # Your code goes here...
9
+ end
@@ -0,0 +1,42 @@
1
+ require 'spec_helper'
2
+
3
+ describe TF2R::Scraper do
4
+ let(:scraper) { TF2R::Scraper.new({}) }
5
+
6
+ it 'is instantiable' do
7
+ expect{
8
+ TF2R::Scraper.new({})
9
+ }.not_to raise_error
10
+ end
11
+
12
+ describe '#new' do
13
+ context 'no options are given' do
14
+ it 'creates an agent with default user agent if none is specified' do
15
+ scraper = TF2R::Scraper.new({})
16
+ expect(scraper.instance_variable_get(:@mech).agent.user_agent).to eql("TF2R::Scraper #{TF2R::VERSION}")
17
+ end
18
+ end
19
+
20
+ context 'a user agent is given' do
21
+ it 'creates an agent with the specified user agent' do
22
+ custom = 'Custom User Agent'
23
+ scraper = TF2R::Scraper.new({user_agent: custom})
24
+ expect(scraper.instance_variable_get(:@mech).agent.user_agent).to eql(custom)
25
+ end
26
+ end
27
+
28
+ context 'a cookies.txt file is given' do
29
+ it 'stores the given cookie' do
30
+ test_filename = File.join(File.dirname(__FILE__), 'test_cookies.txt')
31
+ scraper = TF2R::Scraper.new({cookies_txt: File.new(test_filename)})
32
+ expect(scraper.instance_variable_get(:@mech).cookie_jar.cookies[0].value).to eq("example_value")
33
+ end
34
+ end
35
+ end
36
+
37
+ describe '#fetch' do
38
+ it 'returns a Mechanize::Page' do
39
+ expect(scraper.fetch('http://google.com')).to be_a(Mechanize::Page)
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,5 @@
1
+ require 'tf2r'
2
+
3
+ RSpec.configure do |config|
4
+ config.order = 'random'
5
+ end
@@ -0,0 +1,2 @@
1
+ # This is an example cookies.txt file used for testing.
2
+ example.com FALSE / FALSE 2147483647 example_key example_value
data/tf2r.gemspec ADDED
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'tf2r/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "tf2r"
8
+ spec.version = TF2R::VERSION
9
+ spec.authors = ["Justin Kim"]
10
+ spec.email = ["yulli@yulli.org"]
11
+ spec.summary = %q{A utility for use with TF2R.}
12
+ spec.description = %q{This gem provides a utility for performing various tasks with tf2r.com.}
13
+ spec.homepage = "https://github.com/justinkim/tf2r"
14
+ spec.license = "ISC"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.6"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+
24
+ spec.add_development_dependency "rspec", "~> 3.0"
25
+ spec.add_development_dependency "pry", "~> 0.10"
26
+
27
+ spec.add_runtime_dependency "mechanize", "~> 2.7"
28
+ end
metadata ADDED
@@ -0,0 +1,132 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tf2r
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Justin Kim
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-08-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pry
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.10'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.10'
69
+ - !ruby/object:Gem::Dependency
70
+ name: mechanize
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '2.7'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '2.7'
83
+ description: This gem provides a utility for performing various tasks with tf2r.com.
84
+ email:
85
+ - yulli@yulli.org
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - ".gitignore"
91
+ - ".ruby-gemset"
92
+ - ".ruby-version"
93
+ - Gemfile
94
+ - Gemfile.lock
95
+ - LICENSE
96
+ - README.md
97
+ - Rakefile
98
+ - lib/tf2r.rb
99
+ - lib/tf2r/scraper.rb
100
+ - lib/tf2r/version.rb
101
+ - spec/scraper_spec.rb
102
+ - spec/spec_helper.rb
103
+ - spec/test_cookies.txt
104
+ - tf2r.gemspec
105
+ homepage: https://github.com/justinkim/tf2r
106
+ licenses:
107
+ - ISC
108
+ metadata: {}
109
+ post_install_message:
110
+ rdoc_options: []
111
+ require_paths:
112
+ - lib
113
+ required_ruby_version: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ required_rubygems_version: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: '0'
123
+ requirements: []
124
+ rubyforge_project:
125
+ rubygems_version: 2.4.1
126
+ signing_key:
127
+ specification_version: 4
128
+ summary: A utility for use with TF2R.
129
+ test_files:
130
+ - spec/scraper_spec.rb
131
+ - spec/spec_helper.rb
132
+ - spec/test_cookies.txt