tf2r 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 143e9f53a0a365e8bf98ac186a8b81acc7a3f5a0
4
+ data.tar.gz: f417eab178af88208690c7f257389ae84bf49cf3
5
+ SHA512:
6
+ metadata.gz: 3030e85ac97ab0a5726f8b7aa4f5b0e03bfacd540fcdc581d3f92428432580c50ad9cb9c7975031e68f0fd24e35816258283ca4329bc32697e1c4dffd7082776
7
+ data.tar.gz: 1cf2a0eeae95a3d7b83e248039e706ccbdb96c1cd59421550433a5c5848eff185abc65b5afece44c0254ab1781239485d36b2bf08860b5de73d02a3a22e17976
data/.gitignore ADDED
@@ -0,0 +1,13 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+ *.bundle
10
+ *.so
11
+ *.o
12
+ *.a
13
+ mkmf.log
data/.ruby-gemset ADDED
@@ -0,0 +1 @@
1
+ tf2r_scraper
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.1.2
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in tf2r.gemspec
4
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,64 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ tf2r (0.0.1)
5
+ mechanize (~> 2.7)
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ coderay (1.1.0)
11
+ diff-lcs (1.2.5)
12
+ domain_name (0.5.19)
13
+ unf (>= 0.0.5, < 1.0.0)
14
+ http-cookie (1.0.2)
15
+ domain_name (~> 0.5)
16
+ mechanize (2.7.3)
17
+ domain_name (~> 0.5, >= 0.5.1)
18
+ http-cookie (~> 1.0)
19
+ mime-types (~> 2.0)
20
+ net-http-digest_auth (~> 1.1, >= 1.1.1)
21
+ net-http-persistent (~> 2.5, >= 2.5.2)
22
+ nokogiri (~> 1.4)
23
+ ntlm-http (~> 0.1, >= 0.1.1)
24
+ webrobots (>= 0.0.9, < 0.2)
25
+ method_source (0.8.2)
26
+ mime-types (2.3)
27
+ mini_portile (0.6.0)
28
+ net-http-digest_auth (1.4)
29
+ net-http-persistent (2.9.4)
30
+ nokogiri (1.6.3.1)
31
+ mini_portile (= 0.6.0)
32
+ ntlm-http (0.1.1)
33
+ pry (0.10.0)
34
+ coderay (~> 1.1.0)
35
+ method_source (~> 0.8.1)
36
+ slop (~> 3.4)
37
+ rake (10.3.2)
38
+ rspec (3.0.0)
39
+ rspec-core (~> 3.0.0)
40
+ rspec-expectations (~> 3.0.0)
41
+ rspec-mocks (~> 3.0.0)
42
+ rspec-core (3.0.3)
43
+ rspec-support (~> 3.0.0)
44
+ rspec-expectations (3.0.3)
45
+ diff-lcs (>= 1.2.0, < 2.0)
46
+ rspec-support (~> 3.0.0)
47
+ rspec-mocks (3.0.3)
48
+ rspec-support (~> 3.0.0)
49
+ rspec-support (3.0.3)
50
+ slop (3.6.0)
51
+ unf (0.1.4)
52
+ unf_ext
53
+ unf_ext (0.0.6)
54
+ webrobots (0.1.1)
55
+
56
+ PLATFORMS
57
+ ruby
58
+
59
+ DEPENDENCIES
60
+ bundler (~> 1.6)
61
+ pry (~> 0.10)
62
+ rake (~> 10.0)
63
+ rspec (~> 3.0)
64
+ tf2r!
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2014, Justin Kim <yulli@yulli.org>
2
+
3
+ Permission to use, copy, modify, and/or distribute this software for any
4
+ purpose with or without fee is hereby granted, provided that the above
5
+ copyright notice and this permission notice appear in all copies.
6
+
7
+ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8
+ WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9
+ MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10
+ ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11
+ WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12
+ ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13
+ OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,33 @@
1
+ # TF2R [![Code Climate](https://codeclimate.com/github/justinkim/tf2r/badges/gpa.svg)](https://codeclimate.com/github/justinkim/tf2r)
2
+
3
+ This gem provides a `TF2R::Scraper` that has the ability to scrape various pages on [TF2R](http://tf2r.com) into usable data.
4
+
5
+ Yes, this gem is [semantically versioned](http://semver.org/)!
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'tf2r'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install tf2r
22
+
23
+ ## Usage
24
+
25
+ TODO: Write usage instructions here
26
+
27
+ ## Contributing
28
+
29
+ 1. Fork it ( https://github.com/[my-github-username]/tf2r/fork )
30
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
31
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
32
+ 4. Push to the branch (`git push origin my-new-feature`)
33
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+
@@ -0,0 +1,195 @@
1
+ module TF2R
2
+ class Scraper
3
+ def initialize(options)
4
+ @mech = Mechanize.new { |mech|
5
+ mech.user_agent = options[:user_agent] || "TF2R::Scraper #{VERSION}"
6
+ }
7
+
8
+ @mech.cookie_jar.load(options[:cookies_txt], :cookiestxt) if options[:cookies_txt]
9
+ end
10
+
11
+ def fetch(url)
12
+ @mech.get(url)
13
+ end
14
+ end
15
+ end
16
+
17
+ __END__
18
+
19
+ # This is the old Scraper from NervyPipe.
20
+ class Scraper
21
+ def initialize(cookies_txt_path)
22
+ @cookies_txt_path = cookies_txt_path
23
+
24
+ @main = Mechanize.new { |agent|
25
+ # the User-Agent field in headers
26
+ agent.user_agent = 'Jenna Bot'
27
+ }
28
+
29
+ auth_cookies(@main)
30
+ end
31
+
32
+ def auth_cookies(mech)
33
+ # Before anything, load our auth cookies into the cookie jar
34
+ # This requires a Netscape-style cookies.txt to be in the working dir
35
+ #
36
+ # cookies.txt must include at least a valid "session" cookie from tf2r.com
37
+ mech.cookie_jar.load_cookiestxt(@cookies_txt_path)
38
+ end
39
+
40
+ # Simply return the Mechanize::Page for a url
41
+ def fetch(url)
42
+ @main.get(url)
43
+ end
44
+
45
+ def run(type)
46
+ case type
47
+ when :raffle
48
+ scrape_raffle(@main.get 'http://tf2r.com/kblf84f.html')
49
+ when :user
50
+ scrape_user(@main.get 'http://tf2r.com/user/76561198061719848.html')
51
+ when :main
52
+ scrape_main_page
53
+ when :ranks
54
+ scrape_ranks
55
+ end
56
+ end
57
+
58
+ def scrape_main_page
59
+ page = @main.get('http://tf2r.com/raffles.html')
60
+
61
+ # This regex matches all Mechanize::Page::Links on the main raffles page that are actual raffles
62
+ raffle_mech_links = page.links_with(href: /tf2r\.com\/k/)
63
+
64
+ # an array of strings, which are raffle links
65
+ raffle_links = raffle_mech_links.map { |x| x.uri.to_s }
66
+
67
+ # the array should have raffles from bottom-to-top, old-to-new
68
+ raffle_links.reverse!
69
+ end
70
+
71
+ def scrape_raffle_for_user(page)
72
+ # This is an array of all things Reag was nice enough to class "raffle_infomation"
73
+ # Reag made a typo, so the class really is "raffle_infomation"
74
+ raffle_infos = page.parser.css('.raffle_infomation')
75
+
76
+ # User information
77
+ steam_id = raffle_infos[2].css('a')[0].attributes['href'].text.split('/')[-1].split('.')[0].to_i
78
+ username = raffle_infos[2].css('a').text
79
+ avatar_link = raffle_infos[1].css('a')[0].css('img')[0].attributes['src'].text
80
+
81
+ # posrept will be nil if the Scraper's user has already voted on a user's rep in the raffle
82
+ posrepa = raffle_infos.css('.upvb').text.split
83
+ posrepa.delete('+')
84
+ posrep = posrepa[-1].to_i.to_s
85
+
86
+ negrepa = raffle_infos.css('.downvb').text.split
87
+ negrepa.delete('+')
88
+ negrep = negrepa[-1].to_i.to_s
89
+
90
+ colour = raffle_infos[2].css('a')[0].attributes['style'].value.split('#')[-1].split(';')[0].downcase.chomp
91
+ # The creator of the raffle, using above
92
+ userhash = {steam_id: steam_id, username: username, avatar_link: avatar_link, posrep: posrep, negrep: negrep, colour: colour}
93
+ end
94
+
95
+ def scrape_raffle_for_raffle(page)
96
+ # This is an array of all things Reag was nice enough to class "raffle_infomation"
97
+ # Reag made a typo, so the class really is "raffle_infomation"
98
+ raffle_infos = page.parser.css('.raffle_infomation')
99
+
100
+ # Raffle information
101
+ uri = page.uri # is a URI:HTTP
102
+ path = uri.path # is "/welcome.html" for "http://tf2r.com/welcome.html"
103
+ link_snippet = path.split('/')[1].split('.html')[0] # is 'kabc123' for 'http://tf2r.com/kabc123.html'
104
+
105
+ title = raffle_infos[0].text.split('Title: ')[-1]
106
+ # Lots of info in a single table
107
+ raffle_tds = raffle_infos[3].css('td')
108
+ description = raffle_tds[1].text
109
+
110
+ start_time_string = raffle_tds[9].text
111
+ start_time = DateTime.strptime(start_time_string, '%a, %d %b %Y %H:%M:%S %z').to_time
112
+ end_time_string = raffle_tds[11].text
113
+ end_time = DateTime.strptime(end_time_string, '%a, %d %b %Y %H:%M:%S %z').to_time
114
+
115
+ win_chance_pre_round = raffle_tds[5].text.to_f / 100 # also #winc
116
+ win_chance = win_chance_pre_round.round(5)
117
+
118
+ entries = raffle_tds[7].text # also #entry
119
+ # Entries looks like "42/123", as "current/max"
120
+ # Split by slash, multiple assignment to array with elements mapped to integers
121
+ # Equivalent to a = b[0].to_i; c = b[1].to_i
122
+ current_entries, max_entries = entries.split('/').map { |x| x.to_i }
123
+
124
+ is_done = end_time <= Time.now || current_entries == max_entries || page.parser.css('.welcome_font').text.include?('No winners') || page.parser.css('.welcome_font').text.include?('Winner(s):')
125
+
126
+ rafflehash = {link_snippet: link_snippet, title: title, description: description, start_time: start_time, end_time: end_time,
127
+ win_chance: win_chance, current_entries: current_entries, max_entries: max_entries, is_done: is_done}
128
+ end
129
+
130
+ def scrape_raffle_for_participants(page)
131
+ participants = []
132
+ participant_divs = page.parser.css('.pentry')
133
+ participant_divs.each do |participant|
134
+ steam_id = participant.css('a')[-1].attributes['href'].text.split('/')[-1].split('.')[0].to_i
135
+ username = participant.text
136
+ colour = participant.css('a')[-1].attributes['style'].text.split('#')[-1].split(';')[0].downcase.chomp
137
+
138
+ participants << {steam_id: steam_id, username: username, colour: colour}
139
+ end
140
+
141
+ participants.uniq.reverse
142
+ end
143
+
144
+ def scrape_raffle(page, portions = :all)
145
+ userhash, rafflehash, participants = {}, {}, []
146
+
147
+ case portions
148
+ when :core
149
+ userhash = scrape_raffle_for_user(page)
150
+ rafflehash = scrape_raffle_for_raffle(page)
151
+ when :participants
152
+ participants = scrape_raffle_for_participants(page)
153
+ else
154
+ userhash = scrape_raffle_for_user(page)
155
+ rafflehash = scrape_raffle_for_raffle(page)
156
+ participants = scrape_raffle_for_participants(page)
157
+ end
158
+
159
+ [userhash, rafflehash, participants]
160
+ end
161
+
162
+ def scrape_user(user_page)
163
+ if user_page.parser.css('.profile_info').empty?
164
+ username, avatar_link, posrep, negrep, colour = nil, nil, nil, nil, nil
165
+ steam_id = user_page.uri.path.split('/')[-1].split('.')[0].to_i
166
+ else
167
+ pp user_page.parser.css('.profile_info')
168
+ raffle_infos = user_page.parser.css('.raffle_infomation') # sic
169
+
170
+ steam_id = user_page.uri.path.split('/')[-1].split('.')[0].to_i
171
+ username = user_page.parser.title.split('TF2R Item Raffles - ')[-1]
172
+ avatar_link = raffle_infos[0].css('img')[0].attributes['src'].text
173
+
174
+ posrep = raffle_infos.css('.upvb').text.to_i.to_s
175
+ negrep = raffle_infos.css('.downvb').text.to_i.to_s
176
+
177
+ colour = raffle_infos[1].css('a')[0].attributes['style'].value.split('#')[-1].split(';')[0].downcase.chomp
178
+ end
179
+
180
+ userhash = {steam_id: steam_id, username: username, avatar_link: avatar_link, posrep: posrep, negrep: negrep, colour: colour}
181
+ end
182
+
183
+ def scrape_ranks
184
+ # This scrapes the info page for the various ranks that exist
185
+ page = @main.get('http://tf2r.com/info.html')
186
+
187
+ ranks_div = page.parser.css('#ranks')
188
+ divs = ranks_div.css('div')
189
+ rank_divs = []
190
+ divs.each { |div|
191
+ rank_divs << div unless div.attributes['style'].nil? || !(div.attributes['style'].value.include? 'color')
192
+ }
193
+ colours = rank_divs.map {|div| div.attributes['style'].value.split('color:#')[-1].split(';')[0].downcase.chomp }
194
+ end
195
+ end
@@ -0,0 +1,3 @@
1
+ module TF2R
2
+ VERSION = "0.0.1"
3
+ end
data/lib/tf2r.rb ADDED
@@ -0,0 +1,9 @@
1
+ require 'mechanize'
2
+ require 'pry'
3
+
4
+ require 'tf2r/scraper'
5
+ require 'tf2r/version'
6
+
7
+ module TF2R
8
+ # Your code goes here...
9
+ end
@@ -0,0 +1,42 @@
1
+ require 'spec_helper'
2
+
3
+ describe TF2R::Scraper do
4
+ let(:scraper) { TF2R::Scraper.new({}) }
5
+
6
+ it 'is instantiable' do
7
+ expect{
8
+ TF2R::Scraper.new({})
9
+ }.not_to raise_error
10
+ end
11
+
12
+ describe '#new' do
13
+ context 'no options are given' do
14
+ it 'creates an agent with default user agent if none is specified' do
15
+ scraper = TF2R::Scraper.new({})
16
+ expect(scraper.instance_variable_get(:@mech).agent.user_agent).to eql("TF2R::Scraper #{TF2R::VERSION}")
17
+ end
18
+ end
19
+
20
+ context 'a user agent is given' do
21
+ it 'creates an agent with the specified user agent' do
22
+ custom = 'Custom User Agent'
23
+ scraper = TF2R::Scraper.new({user_agent: custom})
24
+ expect(scraper.instance_variable_get(:@mech).agent.user_agent).to eql(custom)
25
+ end
26
+ end
27
+
28
+ context 'a cookies.txt file is given' do
29
+ it 'stores the given cookie' do
30
+ test_filename = File.join(File.dirname(__FILE__), 'test_cookies.txt')
31
+ scraper = TF2R::Scraper.new({cookies_txt: File.new(test_filename)})
32
+ expect(scraper.instance_variable_get(:@mech).cookie_jar.cookies[0].value).to eq("example_value")
33
+ end
34
+ end
35
+ end
36
+
37
+ describe '#fetch' do
38
+ it 'returns a Mechanize::Page' do
39
+ expect(scraper.fetch('http://google.com')).to be_a(Mechanize::Page)
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,5 @@
1
+ require 'tf2r'
2
+
3
+ RSpec.configure do |config|
4
+ config.order = 'random'
5
+ end
@@ -0,0 +1,2 @@
1
+ # This is an example cookies.txt file used for testing.
2
+ example.com FALSE / FALSE 2147483647 example_key example_value
data/tf2r.gemspec ADDED
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'tf2r/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "tf2r"
8
+ spec.version = TF2R::VERSION
9
+ spec.authors = ["Justin Kim"]
10
+ spec.email = ["yulli@yulli.org"]
11
+ spec.summary = %q{A utility for use with TF2R.}
12
+ spec.description = %q{This gem provides a utility for performing various tasks with tf2r.com.}
13
+ spec.homepage = "https://github.com/justinkim/tf2r"
14
+ spec.license = "ISC"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.6"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+
24
+ spec.add_development_dependency "rspec", "~> 3.0"
25
+ spec.add_development_dependency "pry", "~> 0.10"
26
+
27
+ spec.add_runtime_dependency "mechanize", "~> 2.7"
28
+ end
metadata ADDED
@@ -0,0 +1,132 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tf2r
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Justin Kim
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-08-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.6'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.6'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pry
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.10'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.10'
69
+ - !ruby/object:Gem::Dependency
70
+ name: mechanize
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '2.7'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '2.7'
83
+ description: This gem provides a utility for performing various tasks with tf2r.com.
84
+ email:
85
+ - yulli@yulli.org
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - ".gitignore"
91
+ - ".ruby-gemset"
92
+ - ".ruby-version"
93
+ - Gemfile
94
+ - Gemfile.lock
95
+ - LICENSE
96
+ - README.md
97
+ - Rakefile
98
+ - lib/tf2r.rb
99
+ - lib/tf2r/scraper.rb
100
+ - lib/tf2r/version.rb
101
+ - spec/scraper_spec.rb
102
+ - spec/spec_helper.rb
103
+ - spec/test_cookies.txt
104
+ - tf2r.gemspec
105
+ homepage: https://github.com/justinkim/tf2r
106
+ licenses:
107
+ - ISC
108
+ metadata: {}
109
+ post_install_message:
110
+ rdoc_options: []
111
+ require_paths:
112
+ - lib
113
+ required_ruby_version: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ required_rubygems_version: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: '0'
123
+ requirements: []
124
+ rubyforge_project:
125
+ rubygems_version: 2.4.1
126
+ signing_key:
127
+ specification_version: 4
128
+ summary: A utility for use with TF2R.
129
+ test_files:
130
+ - spec/scraper_spec.rb
131
+ - spec/spec_helper.rb
132
+ - spec/test_cookies.txt