spackler 0.9.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,3 @@
1
+ pkg/*
2
+ *.gem
3
+ .bundle
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in spackler.gemspec
4
+ gemspec
data/README ADDED
@@ -0,0 +1,21 @@
1
+ by: Mark Holton via RedGrind, LLC
2
+
3
+ INSTALLATION
4
+ 1. gem install spackler
5
+
6
+ GETTING STARTED
7
+ 1. run $bundle to ensure you obtain the required dependencies
8
+
9
+ USAGE
10
+ #example class using Spackler gem:
11
+ require 'Spackler'
12
+ include Spackler
13
+
14
+ url = 'http://pga.com'
15
+ major = Spackler::Major.new
16
+ url = major.get_urls(2010)[0] #2010 Masters
17
+
18
+ puts "grabbing URL data from... #{url}"
19
+ players = major.friendly_structure(major.fetch(url))
20
+
21
+ puts players
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
data/lib/spackler.rb ADDED
@@ -0,0 +1,459 @@
1
+ # license: copy this code as much as you want to
2
+ # originally created: 10-29-2008
3
+ # published as ruby gem: 12/24/2010
4
+ # purpose: acquire golf tournament scores, and present it in a more usable form (Array of ostruct's)
5
+ require 'nokogiri'
6
+ require 'open-uri'
7
+ require 'ostruct'
8
+ require 'iconv'
9
+
10
+ #monkey patch String to remove any non-ASCII characters from scrapeage
11
+ class String
12
+ def to_ascii_iconv
13
+ converter = Iconv.new('ASCII//IGNORE//TRANSLIT', 'UTF-8')
14
+ converter.iconv(self).unpack('U*').select{ |cp| cp < 127 }.pack('U*')
15
+ end
16
+ end
17
+
18
+ module Spackler
19
+ class Player
20
+
21
+ SPECIALS = []
22
+ LAST_ONE_NAMES = ["Olazabal", "Jimenez", "Johnson", "Singh", "Thompson", "Hicks", "Wan"] #for names where last 1 name = lname
23
+ LAST_TWO_NAMES = ["V", "IV", "III", "II", "Jr.", "Jr", "Sr.", "Sr", "Jong", "Pelt", "Broeck", "Jonge", "Hed"] #for names where last 2 names = lname
24
+
25
+ def initialize(scraped_full_name)
26
+ @full_name = scraped_full_name
27
+ @fname = ""
28
+ @lname = ""
29
+ self.parse_clean_name
30
+ end
31
+
32
+ attr_reader :fname, :lname, :full_name #lname may include spaces to accomodate "Berganio Jr.", "Love III", etc
33
+
34
+ def translate_crazy_name_char(special_char)
35
+ special_char.strip() #really just a stub for now
36
+ end
37
+
38
+ def flatten name
39
+ #flatten special characters to non-freakish ASCII. E.g. different than straight flatten, make é = e (not e'')
40
+ re = /\(\w{2}\)/
41
+ processed = name.gsub(re, "") #strip out course in parens E.g. Davis Love III (PB)
42
+ processed = processed.gsub(/,/, "") #get rid of commas in name
43
+
44
+ processed
45
+ end
46
+
47
+ def clip_am lname
48
+ #remove (a) from last name
49
+ re = /\(a\)/
50
+
51
+ lname.gsub(re, "") #get rid of (a) after name and return
52
+ end
53
+
54
+ def parse_clean_name
55
+ # take full name and break it apart based on some simple rules
56
+ # later may use Bayesian techniques
57
+ names = self.flatten(@full_name).split(" ")
58
+ if names.length == 2 #normal
59
+ @fname = flatten(names[0])
60
+ @lname = clip_am(flatten(names[1]))
61
+ if @lname == "Waston"
62
+ @lname = "Watson" #correcting pga.com's misspelling
63
+ end
64
+ elsif names.length == 3
65
+ # check if any parts of the scraped_full_name match with CONSTANTS
66
+ names.each do |nm|
67
+ if LAST_ONE_NAMES.include?(nm) #one of the names indicates it's a 3 part name
68
+ @lname = flatten(names[2])
69
+ @fname = flatten(names[0]) + " " + flatten(names[1])
70
+ elsif LAST_TWO_NAMES.include?(nm) #one of the names indicates it's a jr, III name
71
+ @lname = flatten(names[1]) + " " + flatten(names[2])
72
+ @fname = flatten(names[0])
73
+ else #some untrapped 3 part name that doesn't match either case
74
+ #split as if it's LAST_TWO_NAMES
75
+ @lname = flatten(names[2]) + " " + flatten(names[1])
76
+ @fname = flatten(names[0])
77
+ end
78
+ end
79
+ end
80
+ end
81
+
82
+ end
83
+
84
+ class PGA
85
+
86
+ def get_urls(year)
87
+ if year == 2007
88
+ urls = []
89
+ elsif year == 2008
90
+ # diff format: r476
91
+ urls = %w(
92
+ r045 r060 r505 r029 r032 r028 r020 r480 r023 r034 r035 r030
93
+ r003 r004 r483 r018 r054 r481 r012 r019 r022 r021 r025 r471
94
+ r472 r013 r041 r047 r464 r482 r475 r010 r457 r007 r005 r027
95
+ ).map { |t|
96
+ "http://www.pgatour.com/leaderboards/current/#{t}/alt-1.html"
97
+ }
98
+ elsif year == 2009
99
+ urls = %w(
100
+ r016 r006 r002 r003 r004 r005 r007 r457 r473 r475 r009 r020
101
+ ).map { |t|
102
+ "http://www.pgatour.com/leaderboards/current/#{t}/alt-1.html"
103
+ }
104
+ elsif year == 2010
105
+ urls = %w(
106
+ r032 r016
107
+ ).map { |t|
108
+ "http://www.pgatour.com/leaderboards/current/#{t}/alt-1.html"
109
+ }
110
+ else
111
+ urls = []
112
+ end
113
+
114
+ urls
115
+ end
116
+
117
+ def tourney_info(url)
118
+ # tournament name, dates, golf course, location
119
+ # <div class="tourTournSubName">Mayakoba Golf Classic at Riviera Maya-Cancun</div>
120
+ # <div class="tourTournNameDates">Thursday Feb 21 – Sunday Feb 24, 2008</div>
121
+ # <div class="tourTournHeadLinks">El Camaleon Golf Club · Playa del Carmen, Quintana Roo, Mexico</div>
122
+ # <div class="tourTournLogo">
123
+ # <img src="/.element/img/3.0/sect/tournaments/r457/tourn_logo.gif"/>
124
+ # </div>
125
+
126
+ doc = Nokogiri::HTML(open(url))
127
+ tourn = OpenStruct.new
128
+
129
+ #array of hash literals for those that can't be scraped
130
+ tourn_misfits = [
131
+ {:name => "The Barclays"},
132
+ {:name => "BMW Championship"},
133
+ {:name => "The Tour Championship"},
134
+ {:name => "Deutsche Bank Championship"},
135
+ {:name => "ca Championship"}
136
+ ]
137
+
138
+ true_or_false = (doc.css('div.tourTournSubName').first == nil)
139
+ if true_or_false
140
+ # name doesn't exist in markup, therefore lookup in hash
141
+ if url == "http://www.pgatour.com/leaderboards/current/r027/alt-1.html"
142
+ tourn.name = tourn_misfits[0][:name]
143
+ elsif url == "http://www.pgatour.com/leaderboards/current/r028/alt-1.html"
144
+ tourn.name = tourn_misfits[1][:name]
145
+ elsif url == "http://www.pgatour.com/leaderboards/current/r060/alt-1.html"
146
+ tourn.name = tourn_misfits[2][:name]
147
+ elsif url == "http://www.pgatour.com/leaderboards/current/r505/alt-1.html"
148
+ tourn.name = tourn_misfits[3][:name]
149
+ elsif url == "http://www.pgatour.com/leaderboards/current/r473/alt-1.html"
150
+ tourn.name = tourn_misfits[4][:name]
151
+ end
152
+ else
153
+ tourn.name = doc.css('div.tourTournSubName').first.inner_text.strip().to_ascii_iconv #.gsub!(/'/, "")
154
+ end
155
+
156
+ # tourn.dates = "March 9 - 15, 2009"
157
+ # tourn.course = "Doral Golf Resort and Spa"
158
+ if doc.css('div.tourTournNameDates').first == nil
159
+ #some leaderboards have different formats:
160
+ tourn.dates = doc.css('div.tourTournSubInfo').first.inner_text.strip().to_ascii_iconv.split(' . ')[0]
161
+ tourn.course = doc.css('div.tourTournSubInfo').first.inner_text.strip().to_ascii_iconv.split(' . ')[1]#.gsub!(/'/, "")
162
+ else
163
+ tourn.dates = doc.css('div.tourTournNameDates').first.inner_text.strip().to_ascii_iconv #unless doc.css('div.tourTournNameDates') == nil
164
+ tourn.course = doc.css('div.tourTournHeadLinks').first.inner_text.strip().to_ascii_iconv#gsub!(/'/, "") #unless doc.css('div.tourTournHeadLinks') == nil
165
+ #tourn.img = doc.css('div.tourTournLogo').first.inner_html
166
+ end
167
+
168
+ tourn.name = tourn.name.gsub(/'/, '')
169
+ tourn.course = tourn.course.gsub(/'/, '')
170
+ puts "scraped Tourney Name: #{tourn.name}"
171
+
172
+ tourn
173
+ end
174
+
175
+ def fetch(url, incl_missed_cut=false)
176
+ doc = Nokogiri::HTML(open(url))
177
+
178
+ player_data = []
179
+ cells = []
180
+
181
+ #made cut
182
+ doc.css('table.altleaderboard').each do |table| #altleaderboard
183
+ #puts table
184
+ #if table.attributes['class'] == 'altleaderboard'
185
+ table.css('tr').each do |row|
186
+ row.css('td').each do |cel|
187
+ innertext = cel.inner_text.strip()
188
+ cells << innertext.to_ascii_iconv
189
+ end
190
+ player_data << cells
191
+ cells = []
192
+ end
193
+ #end
194
+ end
195
+
196
+ if incl_missed_cut
197
+ doc.css('table.altleaderboard2').each do |table|
198
+ if table.attributes['class'] == 'altleaderboard2'
199
+ table.css('tr').each do |row|
200
+ row.css('td').each do |cel|
201
+ innertext = cel.inner_text.strip().to_ascii_iconv
202
+ cells << innertext
203
+ end
204
+ player_data << cells
205
+ cells = []
206
+ end
207
+ end
208
+ end
209
+ end
210
+
211
+ player_data
212
+ end
213
+
214
+ def friendly_structure player_data
215
+ # take player_data and turn it into array of Ostructs
216
+ players = []
217
+ player_data.each do |p|
218
+ next unless (p.length > 0 && p[0] != "Pos")
219
+ playa = OpenStruct.new
220
+ # extract data from PGA cells:
221
+ playa.money = p[0]
222
+ playa.pos = p[1]
223
+ playa.start = p[2]
224
+ playa.name = p[3]
225
+ this_player = Player.new(playa.name)
226
+ playa.fname = this_player.fname
227
+ playa.lname = this_player.lname
228
+ playa.today = p[4]
229
+ playa.thru = p[5]
230
+ playa.to_par = p[6]
231
+ playa.r1 = p[7]
232
+ playa.r2 = p[8]
233
+ playa.r3 = p[9]
234
+ playa.r4 = p[10]
235
+ playa.total = p[11]
236
+ players << playa
237
+ end
238
+
239
+ return players
240
+ end
241
+
242
+ end #end class PGA
243
+
244
+ class Euro
245
+
246
+ def get_urls(year)
247
+ if year == 2008
248
+ # Euro Tour links
249
+ # not working: 2008020 2008026 2008086' in name:
250
+ urls = %w(
251
+ 2008091 2008093 2008094 2008096 2008098 2008002 2008004 2008006 2008008 2008014
252
+ 2008016 2008018 2008024 2008028 2008032 2008034 2008036 2008038
253
+ 2008040 2008042 2008044 2008046 2008050 2008052 2008054 2008056 2008062 2008068
254
+ 2008070 2008072 2008074 2008076 2008078 2008083 2008084 2008088
255
+ ).map { |t|
256
+ #get rid of ugly assed pageid brackets
257
+ URI.escape("http://scores.europeantour.com/default.sps?pagegid={9FFD4839-08EC-4F90-85A2-10F94D42CDB2}&eventid=#{t}&ieventno=2008088&infosid=2")
258
+ }
259
+ elsif year == 2007
260
+ urls = []
261
+ else
262
+ urls = []
263
+ end
264
+
265
+ urls
266
+
267
+ end
268
+
269
+ def tourney_info(url)
270
+ # tournament name, dates, golf course, location
271
+ # <div id = "tournHeaderDiv">Commercialbank Qatar Masters presented by Dolphin Energy</div>
272
+ # <div id = "tournVenue">Doha G.C.</div>
273
+ # <div id = "tournLocal">Doha, Qatar</div>
274
+ # <div id = "tournHeaderDate">24 Jan 2008 - 27 Jan 2008 </div>
275
+
276
+ doc = Nokogiri::HTML(open(url))
277
+
278
+ tourn = OpenStruct.new
279
+
280
+ tourn.name = doc.css('div#tournHeaderDiv').first.inner_text.strip().to_ascii_iconv
281
+ tourn.course = doc.css('div#tournVenue').first.inner_text.strip().to_ascii_iconv
282
+ tourn.dates = doc.css('div#tournHeaderDate').first.inner_text.strip().to_ascii_iconv
283
+ tourn.local = doc.css('div#tournLocal').first.inner_text.strip().to_ascii_iconv
284
+
285
+ tourn
286
+ end
287
+
288
+ def fetch(url, incl_missed_cut=false)
289
+ doc = Nokogiri::HTML(open(url))
290
+
291
+ player_data = []
292
+ cells = []
293
+
294
+ #made cut and missed cut
295
+ doc.css('div#scoresBoard2 table')[0].css('tr').each do |row|
296
+ row.css('td').each do |cel|
297
+ cells << cel.inner_text.strip().to_ascii_iconv
298
+ end
299
+ player_data << cells
300
+ cells = []
301
+ end
302
+ player_data.pop
303
+ player_data.pop
304
+ player_data.pop
305
+ player_data.reverse!
306
+ player_data.pop
307
+ player_data.reverse!
308
+ player_data.pop
309
+ player_data.pop
310
+ player_data
311
+ end
312
+
313
+ def friendly_structure player_data
314
+ # take player_data and turn it into array of Ostructs
315
+ players = []
316
+ player_data.each do |p|
317
+ next unless (p.length > 0 && p[1] != "Pos")
318
+ playa = OpenStruct.new
319
+ # extract data from PGA cells:
320
+ playa.start = p[0]
321
+ playa.pos = p[1]
322
+ playa.name = p[2]
323
+ this_player = Player.new(playa.name)
324
+ playa.fname = this_player.fname
325
+ playa.lname = this_player.lname
326
+ playa.thru = p[4]
327
+ playa.to_par = p[5]
328
+ playa.r1 = p[6]
329
+ playa.r2 = p[7]
330
+ playa.r3 = p[8]
331
+ playa.r4 = p[9]
332
+ playa.total = (playa.r1.to_i + playa.r2.to_i + playa.r3.to_i + playa.r4.to_i).to_s
333
+
334
+ players << playa
335
+ end
336
+
337
+ return players
338
+ end
339
+ end
340
+
341
+
342
+ class Nationwide
343
+ end
344
+
345
+
346
+ class Major
347
+
348
+ def get_urls(year)
349
+ if year == 2008
350
+ urls = %w( masters usopen british pgachampionship ).map { |t|
351
+ "http://www.majorschampionships.com/#{t}/2008/scoring/index.html"
352
+ }
353
+ elsif year == 2009
354
+ urls = %w( masters usopen british pgachampionship ).map { |t|
355
+ #{}"http://www.majorschampionships.com/#{t}/2009/scoring/index.cfm"
356
+ "http://www.pga.com/pgachampionship/2009/scoring/index.cfm"
357
+ }
358
+ elsif year == 2010
359
+ urls = %w( masters usopen british pgachampionship ).map { |t|
360
+ #"http://www.majorschampionships.com/#{t}/2009/scoring/index.cfm"
361
+ #"http://www.majorschampionships.com/#{t}/2010/scoring/index.cfm"
362
+ "http://www.pga.com/openchampionship/2010/scoring/index.cfm"
363
+ }
364
+ else
365
+ urls = []
366
+ end
367
+
368
+ urls
369
+ end
370
+
371
+ def tourney_info(url, major_name="The Masters")
372
+ doc = Nokogiri::HTML(open(url))
373
+ tourn = OpenStruct.new
374
+
375
+ # this totally sux, just getting it ready for this week, have to refactor a bunch of this later
376
+ tourn.name = major_name
377
+ tourn.dates = "April 9 - 12, 2009"
378
+ tourn.course = "Augusta National Golf Club, Augusta, GA"
379
+
380
+ tourn
381
+ end
382
+
383
+ def fetch(url)
384
+ doc = Nokogiri::HTML(open(url))
385
+
386
+
387
+ player_data = []
388
+ cells = []
389
+
390
+ #made cut
391
+ doc.css('table.leaderMain').each do |table|
392
+ #if table.attributes['class'] == 'leaderMain'
393
+ table.css('tr').each do |row|
394
+ if row.css('td').length > 9 #exclude ads or 'missed cut' td colspan = 11, etc
395
+ row.css('td').each do |cel|
396
+ innertext = cel.inner_text.strip()
397
+ cells << innertext.to_ascii_iconv
398
+ end
399
+ end
400
+ player_data << cells
401
+ cells = []
402
+ end
403
+ #end
404
+ end
405
+
406
+ player_data.reverse!
407
+ player_data.pop
408
+ player_data.pop
409
+ player_data.reverse!
410
+ #player_data.pop
411
+ #player_data.pop
412
+ #player_data
413
+
414
+ player_data
415
+ end
416
+
417
+ def friendly_structure player_data
418
+ # take player_data and turn it into array of Ostructs
419
+ players = []
420
+ player_data.each do |p|
421
+ next unless (p.length > 0 && p[0] != "Pos")
422
+ playa = OpenStruct.new
423
+ # extract data from PGA cells:
424
+ playa.pos = p[0]
425
+ puts "pos: #{playa.pos}"
426
+ playa.mo = p[1]
427
+ puts "mo: #{playa.mo}"
428
+ playa.name = p[2]
429
+ puts "name: #{playa.name}"
430
+ playa.to_par = p[3]
431
+ puts "to_par: #{playa.to_par}"
432
+ playa.thru = p[4]
433
+ puts "thru: #{playa.thru}"
434
+ playa.today = p[5]
435
+ puts "today: #{playa.today}"
436
+ playa.r1 = p[6]
437
+ puts "r1: #{playa.r1}"
438
+ playa.r2 = p[7]
439
+ puts "r2: #{playa.r2}"
440
+ playa.r3 = p[8]
441
+ puts "r3: #{playa.r3}"
442
+ playa.r4 = p[9]
443
+ puts "r4: #{playa.r4}"
444
+ playa.total = p[10]
445
+ puts "total: #{playa.total}"
446
+ if playa.name != nil || playa.name != ""
447
+ this_player = Player.new(playa.name)
448
+ playa.fname = this_player.fname
449
+ playa.lname = this_player.lname
450
+ players << playa
451
+ end
452
+ end
453
+
454
+ return players
455
+ end #friendly_structure
456
+
457
+ end #end class Major
458
+
459
+ end # end Spackler
@@ -0,0 +1,3 @@
1
+ module Spackler
2
+ VERSION = "0.9.2.5"
3
+ end
data/spackler.gemspec ADDED
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "spackler/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.add_development_dependency %q<nokogiri>, [">= 1.3"]
7
+
8
+ s.name = "spackler"
9
+ s.version = Spackler::VERSION
10
+ s.platform = Gem::Platform::RUBY
11
+ s.authors = ["Mark Holton (RedGrind, LLC)"]
12
+ s.email = ["holtonma@gmail.com"]
13
+ s.homepage = "https://github.com/holtonma/spackler"
14
+ s.summary = %q{Obtain all data from PGA Tour, European Tour, and Majors in a friendly
15
+ output format}
16
+ s.description = <<-EOF
17
+ 'The spackler gem enables you to very easily obtain data on all golf
18
+ tournament scores throughout the web. See README for more details'
19
+ EOF
20
+
21
+ s.rubyforge_project = "spackler"
22
+
23
+ s.files = `git ls-files`.split("\n")
24
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
25
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
26
+ s.require_paths = ["lib"]
27
+ end
metadata ADDED
@@ -0,0 +1,85 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: spackler
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 9
8
+ - 2
9
+ - 5
10
+ version: 0.9.2.5
11
+ platform: ruby
12
+ authors:
13
+ - Mark Holton (RedGrind, LLC)
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-12-24 00:00:00 -08:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: nokogiri
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ segments:
30
+ - 1
31
+ - 3
32
+ version: "1.3"
33
+ type: :development
34
+ version_requirements: *id001
35
+ description: " 'The spackler gem enables you to very easily obtain data on all golf \n tournament scores throughout the web. See README for more details'\n"
36
+ email:
37
+ - holtonma@gmail.com
38
+ executables: []
39
+
40
+ extensions: []
41
+
42
+ extra_rdoc_files: []
43
+
44
+ files:
45
+ - .gitignore
46
+ - Gemfile
47
+ - README
48
+ - Rakefile
49
+ - lib/spackler.rb
50
+ - lib/spackler/version.rb
51
+ - spackler.gemspec
52
+ has_rdoc: true
53
+ homepage: https://github.com/holtonma/spackler
54
+ licenses: []
55
+
56
+ post_install_message:
57
+ rdoc_options: []
58
+
59
+ require_paths:
60
+ - lib
61
+ required_ruby_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ segments:
67
+ - 0
68
+ version: "0"
69
+ required_rubygems_version: !ruby/object:Gem::Requirement
70
+ none: false
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ segments:
75
+ - 0
76
+ version: "0"
77
+ requirements: []
78
+
79
+ rubyforge_project: spackler
80
+ rubygems_version: 1.3.7
81
+ signing_key:
82
+ specification_version: 3
83
+ summary: Obtain all data from PGA Tour, European Tour, and Majors in a friendly output format
84
+ test_files: []
85
+