spackler 0.9.2.5

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,3 @@
1
+ pkg/*
2
+ *.gem
3
+ .bundle
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in spackler.gemspec
4
+ gemspec
data/README ADDED
@@ -0,0 +1,21 @@
1
+ by: Mark Holton via RedGrind, LLC
2
+
3
+ INSTALLATION
4
+ 1. gem install spackler
5
+
6
+ GETTING STARTED
7
+ 1. run $bundle to ensure you obtain the required dependencies
8
+
9
+ USAGE
10
+ #example class using Spackler gem:
11
+ require 'Spackler'
12
+ include Spackler
13
+
14
+ url = 'http://pga.com'
15
+ major = Spackler::Major.new
16
+ url = major.get_urls(2010)[0] #2010 Masters
17
+
18
+ puts "grabbing URL data from... #{url}"
19
+ players = major.friendly_structure(major.fetch(url))
20
+
21
+ puts players
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
data/lib/spackler.rb ADDED
@@ -0,0 +1,459 @@
1
+ # license: copy this code as much as you want to
2
+ # originally created: 10-29-2008
3
+ # published as ruby gem: 12/24/2010
4
+ # purpose: acquire golf tournament scores, and present it in a more usable form (Array of ostruct's)
5
+ require 'nokogiri'
6
+ require 'open-uri'
7
+ require 'ostruct'
8
+ require 'iconv'
9
+
10
+ #monkey patch String to remove any non-ASCII characters from scrapeage
11
+ class String
12
+ def to_ascii_iconv
13
+ converter = Iconv.new('ASCII//IGNORE//TRANSLIT', 'UTF-8')
14
+ converter.iconv(self).unpack('U*').select{ |cp| cp < 127 }.pack('U*')
15
+ end
16
+ end
17
+
18
+ module Spackler
19
+ class Player
20
+
21
+ SPECIALS = []
22
+ LAST_ONE_NAMES = ["Olazabal", "Jimenez", "Johnson", "Singh", "Thompson", "Hicks", "Wan"] #for names where last 1 name = lname
23
+ LAST_TWO_NAMES = ["V", "IV", "III", "II", "Jr.", "Jr", "Sr.", "Sr", "Jong", "Pelt", "Broeck", "Jonge", "Hed"] #for names where last 2 names = lname
24
+
25
+ def initialize(scraped_full_name)
26
+ @full_name = scraped_full_name
27
+ @fname = ""
28
+ @lname = ""
29
+ self.parse_clean_name
30
+ end
31
+
32
+ attr_reader :fname, :lname, :full_name #lname may include spaces to accomodate "Berganio Jr.", "Love III", etc
33
+
34
+ def translate_crazy_name_char(special_char)
35
+ special_char.strip() #really just a stub for now
36
+ end
37
+
38
+ def flatten name
39
+ #flatten special characters to non-freakish ASCII. E.g. different than straight flatten, make é = e (not e'')
40
+ re = /\(\w{2}\)/
41
+ processed = name.gsub(re, "") #strip out course in parens E.g. Davis Love III (PB)
42
+ processed = processed.gsub(/,/, "") #get rid of commas in name
43
+
44
+ processed
45
+ end
46
+
47
+ def clip_am lname
48
+ #remove (a) from last name
49
+ re = /\(a\)/
50
+
51
+ lname.gsub(re, "") #get rid of (a) after name and return
52
+ end
53
+
54
+ def parse_clean_name
55
+ # take full name and break it apart based on some simple rules
56
+ # later may use Bayesian techniques
57
+ names = self.flatten(@full_name).split(" ")
58
+ if names.length == 2 #normal
59
+ @fname = flatten(names[0])
60
+ @lname = clip_am(flatten(names[1]))
61
+ if @lname == "Waston"
62
+ @lname = "Watson" #correcting pga.com's misspelling
63
+ end
64
+ elsif names.length == 3
65
+ # check if any parts of the scraped_full_name match with CONSTANTS
66
+ names.each do |nm|
67
+ if LAST_ONE_NAMES.include?(nm) #one of the names indicates it's a 3 part name
68
+ @lname = flatten(names[2])
69
+ @fname = flatten(names[0]) + " " + flatten(names[1])
70
+ elsif LAST_TWO_NAMES.include?(nm) #one of the names indicates it's a jr, III name
71
+ @lname = flatten(names[1]) + " " + flatten(names[2])
72
+ @fname = flatten(names[0])
73
+ else #some untrapped 3 part name that doesn't match either case
74
+ #split as if it's LAST_TWO_NAMES
75
+ @lname = flatten(names[2]) + " " + flatten(names[1])
76
+ @fname = flatten(names[0])
77
+ end
78
+ end
79
+ end
80
+ end
81
+
82
+ end
83
+
84
+ class PGA
85
+
86
+ def get_urls(year)
87
+ if year == 2007
88
+ urls = []
89
+ elsif year == 2008
90
+ # diff format: r476
91
+ urls = %w(
92
+ r045 r060 r505 r029 r032 r028 r020 r480 r023 r034 r035 r030
93
+ r003 r004 r483 r018 r054 r481 r012 r019 r022 r021 r025 r471
94
+ r472 r013 r041 r047 r464 r482 r475 r010 r457 r007 r005 r027
95
+ ).map { |t|
96
+ "http://www.pgatour.com/leaderboards/current/#{t}/alt-1.html"
97
+ }
98
+ elsif year == 2009
99
+ urls = %w(
100
+ r016 r006 r002 r003 r004 r005 r007 r457 r473 r475 r009 r020
101
+ ).map { |t|
102
+ "http://www.pgatour.com/leaderboards/current/#{t}/alt-1.html"
103
+ }
104
+ elsif year == 2010
105
+ urls = %w(
106
+ r032 r016
107
+ ).map { |t|
108
+ "http://www.pgatour.com/leaderboards/current/#{t}/alt-1.html"
109
+ }
110
+ else
111
+ urls = []
112
+ end
113
+
114
+ urls
115
+ end
116
+
117
+ def tourney_info(url)
118
+ # tournament name, dates, golf course, location
119
+ # <div class="tourTournSubName">Mayakoba Golf Classic at Riviera Maya-Cancun</div>
120
+ # <div class="tourTournNameDates">Thursday Feb 21 – Sunday Feb 24, 2008</div>
121
+ # <div class="tourTournHeadLinks">El Camaleon Golf Club · Playa del Carmen, Quintana Roo, Mexico</div>
122
+ # <div class="tourTournLogo">
123
+ # <img src="/.element/img/3.0/sect/tournaments/r457/tourn_logo.gif"/>
124
+ # </div>
125
+
126
+ doc = Nokogiri::HTML(open(url))
127
+ tourn = OpenStruct.new
128
+
129
+ #array of hash literals for those that can't be scraped
130
+ tourn_misfits = [
131
+ {:name => "The Barclays"},
132
+ {:name => "BMW Championship"},
133
+ {:name => "The Tour Championship"},
134
+ {:name => "Deutsche Bank Championship"},
135
+ {:name => "ca Championship"}
136
+ ]
137
+
138
+ true_or_false = (doc.css('div.tourTournSubName').first == nil)
139
+ if true_or_false
140
+ # name doesn't exist in markup, therefore lookup in hash
141
+ if url == "http://www.pgatour.com/leaderboards/current/r027/alt-1.html"
142
+ tourn.name = tourn_misfits[0][:name]
143
+ elsif url == "http://www.pgatour.com/leaderboards/current/r028/alt-1.html"
144
+ tourn.name = tourn_misfits[1][:name]
145
+ elsif url == "http://www.pgatour.com/leaderboards/current/r060/alt-1.html"
146
+ tourn.name = tourn_misfits[2][:name]
147
+ elsif url == "http://www.pgatour.com/leaderboards/current/r505/alt-1.html"
148
+ tourn.name = tourn_misfits[3][:name]
149
+ elsif url == "http://www.pgatour.com/leaderboards/current/r473/alt-1.html"
150
+ tourn.name = tourn_misfits[4][:name]
151
+ end
152
+ else
153
+ tourn.name = doc.css('div.tourTournSubName').first.inner_text.strip().to_ascii_iconv #.gsub!(/'/, "")
154
+ end
155
+
156
+ # tourn.dates = "March 9 - 15, 2009"
157
+ # tourn.course = "Doral Golf Resort and Spa"
158
+ if doc.css('div.tourTournNameDates').first == nil
159
+ #some leaderboards have different formats:
160
+ tourn.dates = doc.css('div.tourTournSubInfo').first.inner_text.strip().to_ascii_iconv.split(' . ')[0]
161
+ tourn.course = doc.css('div.tourTournSubInfo').first.inner_text.strip().to_ascii_iconv.split(' . ')[1]#.gsub!(/'/, "")
162
+ else
163
+ tourn.dates = doc.css('div.tourTournNameDates').first.inner_text.strip().to_ascii_iconv #unless doc.css('div.tourTournNameDates') == nil
164
+ tourn.course = doc.css('div.tourTournHeadLinks').first.inner_text.strip().to_ascii_iconv#gsub!(/'/, "") #unless doc.css('div.tourTournHeadLinks') == nil
165
+ #tourn.img = doc.css('div.tourTournLogo').first.inner_html
166
+ end
167
+
168
+ tourn.name = tourn.name.gsub(/'/, '')
169
+ tourn.course = tourn.course.gsub(/'/, '')
170
+ puts "scraped Tourney Name: #{tourn.name}"
171
+
172
+ tourn
173
+ end
174
+
175
+ def fetch(url, incl_missed_cut=false)
176
+ doc = Nokogiri::HTML(open(url))
177
+
178
+ player_data = []
179
+ cells = []
180
+
181
+ #made cut
182
+ doc.css('table.altleaderboard').each do |table| #altleaderboard
183
+ #puts table
184
+ #if table.attributes['class'] == 'altleaderboard'
185
+ table.css('tr').each do |row|
186
+ row.css('td').each do |cel|
187
+ innertext = cel.inner_text.strip()
188
+ cells << innertext.to_ascii_iconv
189
+ end
190
+ player_data << cells
191
+ cells = []
192
+ end
193
+ #end
194
+ end
195
+
196
+ if incl_missed_cut
197
+ doc.css('table.altleaderboard2').each do |table|
198
+ if table.attributes['class'] == 'altleaderboard2'
199
+ table.css('tr').each do |row|
200
+ row.css('td').each do |cel|
201
+ innertext = cel.inner_text.strip().to_ascii_iconv
202
+ cells << innertext
203
+ end
204
+ player_data << cells
205
+ cells = []
206
+ end
207
+ end
208
+ end
209
+ end
210
+
211
+ player_data
212
+ end
213
+
214
+ def friendly_structure player_data
215
+ # take player_data and turn it into array of Ostructs
216
+ players = []
217
+ player_data.each do |p|
218
+ next unless (p.length > 0 && p[0] != "Pos")
219
+ playa = OpenStruct.new
220
+ # extract data from PGA cells:
221
+ playa.money = p[0]
222
+ playa.pos = p[1]
223
+ playa.start = p[2]
224
+ playa.name = p[3]
225
+ this_player = Player.new(playa.name)
226
+ playa.fname = this_player.fname
227
+ playa.lname = this_player.lname
228
+ playa.today = p[4]
229
+ playa.thru = p[5]
230
+ playa.to_par = p[6]
231
+ playa.r1 = p[7]
232
+ playa.r2 = p[8]
233
+ playa.r3 = p[9]
234
+ playa.r4 = p[10]
235
+ playa.total = p[11]
236
+ players << playa
237
+ end
238
+
239
+ return players
240
+ end
241
+
242
+ end #end class PGA
243
+
244
+ class Euro
245
+
246
+ def get_urls(year)
247
+ if year == 2008
248
+ # Euro Tour links
249
+ # not working: 2008020 2008026 2008086' in name:
250
+ urls = %w(
251
+ 2008091 2008093 2008094 2008096 2008098 2008002 2008004 2008006 2008008 2008014
252
+ 2008016 2008018 2008024 2008028 2008032 2008034 2008036 2008038
253
+ 2008040 2008042 2008044 2008046 2008050 2008052 2008054 2008056 2008062 2008068
254
+ 2008070 2008072 2008074 2008076 2008078 2008083 2008084 2008088
255
+ ).map { |t|
256
+ #get rid of ugly assed pageid brackets
257
+ URI.escape("http://scores.europeantour.com/default.sps?pagegid={9FFD4839-08EC-4F90-85A2-10F94D42CDB2}&eventid=#{t}&ieventno=2008088&infosid=2")
258
+ }
259
+ elsif year == 2007
260
+ urls = []
261
+ else
262
+ urls = []
263
+ end
264
+
265
+ urls
266
+
267
+ end
268
+
269
+ def tourney_info(url)
270
+ # tournament name, dates, golf course, location
271
+ # <div id = "tournHeaderDiv">Commercialbank Qatar Masters presented by Dolphin Energy</div>
272
+ # <div id = "tournVenue">Doha G.C.</div>
273
+ # <div id = "tournLocal">Doha, Qatar</div>
274
+ # <div id = "tournHeaderDate">24 Jan 2008 - 27 Jan 2008 </div>
275
+
276
+ doc = Nokogiri::HTML(open(url))
277
+
278
+ tourn = OpenStruct.new
279
+
280
+ tourn.name = doc.css('div#tournHeaderDiv').first.inner_text.strip().to_ascii_iconv
281
+ tourn.course = doc.css('div#tournVenue').first.inner_text.strip().to_ascii_iconv
282
+ tourn.dates = doc.css('div#tournHeaderDate').first.inner_text.strip().to_ascii_iconv
283
+ tourn.local = doc.css('div#tournLocal').first.inner_text.strip().to_ascii_iconv
284
+
285
+ tourn
286
+ end
287
+
288
+ def fetch(url, incl_missed_cut=false)
289
+ doc = Nokogiri::HTML(open(url))
290
+
291
+ player_data = []
292
+ cells = []
293
+
294
+ #made cut and missed cut
295
+ doc.css('div#scoresBoard2 table')[0].css('tr').each do |row|
296
+ row.css('td').each do |cel|
297
+ cells << cel.inner_text.strip().to_ascii_iconv
298
+ end
299
+ player_data << cells
300
+ cells = []
301
+ end
302
+ player_data.pop
303
+ player_data.pop
304
+ player_data.pop
305
+ player_data.reverse!
306
+ player_data.pop
307
+ player_data.reverse!
308
+ player_data.pop
309
+ player_data.pop
310
+ player_data
311
+ end
312
+
313
+ def friendly_structure player_data
314
+ # take player_data and turn it into array of Ostructs
315
+ players = []
316
+ player_data.each do |p|
317
+ next unless (p.length > 0 && p[1] != "Pos")
318
+ playa = OpenStruct.new
319
+ # extract data from PGA cells:
320
+ playa.start = p[0]
321
+ playa.pos = p[1]
322
+ playa.name = p[2]
323
+ this_player = Player.new(playa.name)
324
+ playa.fname = this_player.fname
325
+ playa.lname = this_player.lname
326
+ playa.thru = p[4]
327
+ playa.to_par = p[5]
328
+ playa.r1 = p[6]
329
+ playa.r2 = p[7]
330
+ playa.r3 = p[8]
331
+ playa.r4 = p[9]
332
+ playa.total = (playa.r1.to_i + playa.r2.to_i + playa.r3.to_i + playa.r4.to_i).to_s
333
+
334
+ players << playa
335
+ end
336
+
337
+ return players
338
+ end
339
+ end
340
+
341
+
342
+ class Nationwide
343
+ end
344
+
345
+
346
+ class Major
347
+
348
+ def get_urls(year)
349
+ if year == 2008
350
+ urls = %w( masters usopen british pgachampionship ).map { |t|
351
+ "http://www.majorschampionships.com/#{t}/2008/scoring/index.html"
352
+ }
353
+ elsif year == 2009
354
+ urls = %w( masters usopen british pgachampionship ).map { |t|
355
+ #{}"http://www.majorschampionships.com/#{t}/2009/scoring/index.cfm"
356
+ "http://www.pga.com/pgachampionship/2009/scoring/index.cfm"
357
+ }
358
+ elsif year == 2010
359
+ urls = %w( masters usopen british pgachampionship ).map { |t|
360
+ #"http://www.majorschampionships.com/#{t}/2009/scoring/index.cfm"
361
+ #"http://www.majorschampionships.com/#{t}/2010/scoring/index.cfm"
362
+ "http://www.pga.com/openchampionship/2010/scoring/index.cfm"
363
+ }
364
+ else
365
+ urls = []
366
+ end
367
+
368
+ urls
369
+ end
370
+
371
+ def tourney_info(url, major_name="The Masters")
372
+ doc = Nokogiri::HTML(open(url))
373
+ tourn = OpenStruct.new
374
+
375
+ # this totally sux, just getting it ready for this week, have to refactor a bunch of this later
376
+ tourn.name = major_name
377
+ tourn.dates = "April 9 - 12, 2009"
378
+ tourn.course = "Augusta National Golf Club, Augusta, GA"
379
+
380
+ tourn
381
+ end
382
+
383
+ def fetch(url)
384
+ doc = Nokogiri::HTML(open(url))
385
+
386
+
387
+ player_data = []
388
+ cells = []
389
+
390
+ #made cut
391
+ doc.css('table.leaderMain').each do |table|
392
+ #if table.attributes['class'] == 'leaderMain'
393
+ table.css('tr').each do |row|
394
+ if row.css('td').length > 9 #exclude ads or 'missed cut' td colspan = 11, etc
395
+ row.css('td').each do |cel|
396
+ innertext = cel.inner_text.strip()
397
+ cells << innertext.to_ascii_iconv
398
+ end
399
+ end
400
+ player_data << cells
401
+ cells = []
402
+ end
403
+ #end
404
+ end
405
+
406
+ player_data.reverse!
407
+ player_data.pop
408
+ player_data.pop
409
+ player_data.reverse!
410
+ #player_data.pop
411
+ #player_data.pop
412
+ #player_data
413
+
414
+ player_data
415
+ end
416
+
417
+ def friendly_structure player_data
418
+ # take player_data and turn it into array of Ostructs
419
+ players = []
420
+ player_data.each do |p|
421
+ next unless (p.length > 0 && p[0] != "Pos")
422
+ playa = OpenStruct.new
423
+ # extract data from PGA cells:
424
+ playa.pos = p[0]
425
+ puts "pos: #{playa.pos}"
426
+ playa.mo = p[1]
427
+ puts "mo: #{playa.mo}"
428
+ playa.name = p[2]
429
+ puts "name: #{playa.name}"
430
+ playa.to_par = p[3]
431
+ puts "to_par: #{playa.to_par}"
432
+ playa.thru = p[4]
433
+ puts "thru: #{playa.thru}"
434
+ playa.today = p[5]
435
+ puts "today: #{playa.today}"
436
+ playa.r1 = p[6]
437
+ puts "r1: #{playa.r1}"
438
+ playa.r2 = p[7]
439
+ puts "r2: #{playa.r2}"
440
+ playa.r3 = p[8]
441
+ puts "r3: #{playa.r3}"
442
+ playa.r4 = p[9]
443
+ puts "r4: #{playa.r4}"
444
+ playa.total = p[10]
445
+ puts "total: #{playa.total}"
446
+ if playa.name != nil || playa.name != ""
447
+ this_player = Player.new(playa.name)
448
+ playa.fname = this_player.fname
449
+ playa.lname = this_player.lname
450
+ players << playa
451
+ end
452
+ end
453
+
454
+ return players
455
+ end #friendly_structure
456
+
457
+ end #end class Major
458
+
459
+ end # end Spackler
@@ -0,0 +1,3 @@
1
+ module Spackler
2
+ VERSION = "0.9.2.5"
3
+ end
data/spackler.gemspec ADDED
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "spackler/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.add_development_dependency %q<nokogiri>, [">= 1.3"]
7
+
8
+ s.name = "spackler"
9
+ s.version = Spackler::VERSION
10
+ s.platform = Gem::Platform::RUBY
11
+ s.authors = ["Mark Holton (RedGrind, LLC)"]
12
+ s.email = ["holtonma@gmail.com"]
13
+ s.homepage = "https://github.com/holtonma/spackler"
14
+ s.summary = %q{Obtain all data from PGA Tour, European Tour, and Majors in a friendly
15
+ output format}
16
+ s.description = <<-EOF
17
+ 'The spackler gem enables you to very easily obtain data on all golf
18
+ tournament scores throughout the web. See README for more details'
19
+ EOF
20
+
21
+ s.rubyforge_project = "spackler"
22
+
23
+ s.files = `git ls-files`.split("\n")
24
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
25
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
26
+ s.require_paths = ["lib"]
27
+ end
metadata ADDED
@@ -0,0 +1,85 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: spackler
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 9
8
+ - 2
9
+ - 5
10
+ version: 0.9.2.5
11
+ platform: ruby
12
+ authors:
13
+ - Mark Holton (RedGrind, LLC)
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-12-24 00:00:00 -08:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: nokogiri
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ segments:
30
+ - 1
31
+ - 3
32
+ version: "1.3"
33
+ type: :development
34
+ version_requirements: *id001
35
+ description: " 'The spackler gem enables you to very easily obtain data on all golf \n tournament scores throughout the web. See README for more details'\n"
36
+ email:
37
+ - holtonma@gmail.com
38
+ executables: []
39
+
40
+ extensions: []
41
+
42
+ extra_rdoc_files: []
43
+
44
+ files:
45
+ - .gitignore
46
+ - Gemfile
47
+ - README
48
+ - Rakefile
49
+ - lib/spackler.rb
50
+ - lib/spackler/version.rb
51
+ - spackler.gemspec
52
+ has_rdoc: true
53
+ homepage: https://github.com/holtonma/spackler
54
+ licenses: []
55
+
56
+ post_install_message:
57
+ rdoc_options: []
58
+
59
+ require_paths:
60
+ - lib
61
+ required_ruby_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ segments:
67
+ - 0
68
+ version: "0"
69
+ required_rubygems_version: !ruby/object:Gem::Requirement
70
+ none: false
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ segments:
75
+ - 0
76
+ version: "0"
77
+ requirements: []
78
+
79
+ rubyforge_project: spackler
80
+ rubygems_version: 1.3.7
81
+ signing_key:
82
+ specification_version: 3
83
+ summary: Obtain all data from PGA Tour, European Tour, and Majors in a friendly output format
84
+ test_files: []
85
+