vegas_insider_scraper 0.0.15 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/lib/sports/mlb.rb +10 -10
- data/lib/sports/nba.rb +5 -5
- data/lib/sports/ncaabb.rb +16 -16
- data/lib/sports/ncaafb.rb +69 -65
- data/lib/sports/nfl.rb +5 -5
- data/lib/sports/scraper_league.rb +656 -549
- data/lib/vegas_insider_scraper.rb +11 -6
- metadata +22 -14
- data/lib/sports/nhl.rb +0 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 538bf0d9d798e8602915f700679ad9541dcad4b6f279cf4fd8a734030d556821
|
4
|
+
data.tar.gz: 4d0d701086e782e36ecc3524145a30189627f1940d4b5c5b6473bc4a1cb167e9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e4b0a4db84a2ce082e21de78de71c02a51f12c8c1bc4325f60151c9547cdb06ffa1ebe93b42bbae7d5206b433f76284a792d6c17f36585cb8eecde12422f95b4
|
7
|
+
data.tar.gz: b6a1ff071b8d3675cf318468c45fbc4a8806eb750cbd09be8a0656dbb9415b6b4229a978a8cf90febfec68a0c051c20e0d072d754b4db3af840833b09936a597
|
data/lib/sports/mlb.rb
CHANGED
@@ -1,16 +1,16 @@
|
|
1
1
|
|
2
2
|
class MLB < ScraperLeague
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
4
|
+
def initialize
|
5
|
+
@sport_id = 4
|
6
|
+
@sport_name = :mlb
|
7
|
+
super
|
8
|
+
@moneyline_sport = true
|
9
|
+
end
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
11
|
+
def current_games
|
12
|
+
@current_games ||= get_lines(["http://www.vegasinsider.com/#{sport_name}/odds/las-vegas/run/",
|
13
|
+
"http://www.vegasinsider.com/#{sport_name}/odds/las-vegas/"])
|
14
|
+
end
|
15
15
|
|
16
16
|
end
|
data/lib/sports/nba.rb
CHANGED
data/lib/sports/ncaabb.rb
CHANGED
@@ -1,22 +1,22 @@
|
|
1
1
|
|
2
2
|
class NCAABB < ScraperLeague
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
4
|
+
def initialize
|
5
|
+
@sport_id = 1
|
6
|
+
@sport_name = 'college-basketball'
|
7
|
+
super
|
8
|
+
end
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
10
|
+
# def get_nicknames
|
11
|
+
# start_time = Time.now
|
12
|
+
# num_successes = 0
|
13
|
+
# Team.ncaabb_teams.each_with_index do |team, i|
|
14
|
+
# url = "http://www.vegasinsider.com/college-basketball/teams/team-page.cfm/team/#{team.vegas_insider_identifier}"
|
15
|
+
# nickname = Scraper.scrape_team_page_for_nickname(team.vegas_insider_identifier, url)
|
16
|
+
# team.nickname = nickname
|
17
|
+
# team.save
|
18
|
+
# end
|
19
|
+
# Time.now - start_time
|
20
|
+
# end
|
21
21
|
|
22
22
|
end
|
data/lib/sports/ncaafb.rb
CHANGED
@@ -1,80 +1,84 @@
|
|
1
1
|
|
2
2
|
class NCAAFB < ScraperLeague
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
4
|
+
def initialize
|
5
|
+
@sport_id = 0
|
6
|
+
@sport_name = 'college-football'
|
7
|
+
super
|
8
|
+
end
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
# next if team.nickname
|
14
|
-
# url = "http://www.vegasinsider.com/college-football/teams/team-page.cfm/team/#{team.vegas_insider_identifier}"
|
15
|
-
# nickname = Scraper.scrape_team_page_for_nickname(team.vegas_insider_identifier, url)
|
16
|
-
# team.nickname = nickname
|
17
|
-
# team.save
|
18
|
-
# end
|
19
|
-
# Time.now - start_time
|
20
|
-
# end
|
10
|
+
def teams
|
11
|
+
@teams ||= scrape_teams
|
12
|
+
end
|
21
13
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
14
|
+
# def get_nicknames
|
15
|
+
# start_time = Time.now
|
16
|
+
# Team.where(sport_id: 0).each_with_index do |team, i|
|
17
|
+
# next if team.nickname
|
18
|
+
# url = "http://www.vegasinsider.com/college-football/teams/team-page.cfm/team/#{team.vegas_insider_identifier}"
|
19
|
+
# nickname = Scraper.scrape_team_page_for_nickname(team.vegas_insider_identifier, url)
|
20
|
+
# team.nickname = nickname
|
21
|
+
# team.save
|
22
|
+
# end
|
23
|
+
# Time.now - start_time
|
24
|
+
# end
|
30
25
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
26
|
+
# def get_locations
|
27
|
+
# start_time = Time.now
|
28
|
+
# Team.where(sport_id: 0, custom_team_flag: 1).each_with_index do |team, i|
|
29
|
+
# team.location = nil
|
30
|
+
# team.save
|
31
|
+
# end
|
32
|
+
# Time.now - start_time
|
33
|
+
# end
|
36
34
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
# end
|
43
|
-
# Time.now - start_time
|
44
|
-
# end
|
35
|
+
# def scrape_custom_team_page_for_location(vegas_identifier, url)
|
36
|
+
# doc = Nokogiri::HTML(open(url))
|
37
|
+
# title = doc.at_css('h1.page_title').content.gsub(' Team Page', '')
|
38
|
+
# return title
|
39
|
+
# end
|
45
40
|
|
46
|
-
|
47
|
-
|
48
|
-
|
41
|
+
# def remove_nickname_from_location
|
42
|
+
# start_time = Time.now
|
43
|
+
# Team.where(sport_id: 0).each_with_index do |team, i|
|
44
|
+
# puts team.location
|
45
|
+
# puts team.location.gsub(" #{team.nickname}", '')
|
46
|
+
# end
|
47
|
+
# Time.now - start_time
|
48
|
+
# end
|
49
49
|
|
50
|
-
|
51
|
-
|
50
|
+
# def scrape_fcs_teams
|
51
|
+
# url = 'http://www.vegasinsider.com/college-football/teams/'
|
52
|
+
# doc = Nokogiri::HTML(open(url))
|
52
53
|
|
53
|
-
|
54
|
-
|
55
|
-
# new_conference = row.at_css('td.viSubHeader1')
|
54
|
+
# current_conference = nil
|
55
|
+
# fcs = []
|
56
56
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
# team = row.at_css('a')
|
61
|
-
# if team
|
62
|
-
# team_formatted = {
|
63
|
-
# team_name: team.content,
|
64
|
-
# team_url_id: team_url_parser(team.attribute('href')),
|
65
|
-
# conference: current_conference,
|
66
|
-
# league: sport_id
|
67
|
-
# }
|
68
|
-
# puts team_formatted
|
69
|
-
# fcs.push team_formatted
|
70
|
-
# end
|
71
|
-
# end
|
72
|
-
# end
|
73
|
-
# end
|
57
|
+
# doc.css('.main-content-cell table table table').each_with_index do |col,i|
|
58
|
+
# col.css('tr').each do |row|
|
59
|
+
# new_conference = row.at_css('td.viSubHeader1')
|
74
60
|
|
75
|
-
|
76
|
-
|
61
|
+
# if new_conference
|
62
|
+
# current_conference = new_conference.content
|
63
|
+
# else
|
64
|
+
# team = row.at_css('a')
|
65
|
+
# if team
|
66
|
+
# team_formatted = {
|
67
|
+
# team_name: team.content,
|
68
|
+
# team_url_id: team_url_parser(team.attribute('href')),
|
69
|
+
# conference: current_conference,
|
70
|
+
# league: sport_id
|
71
|
+
# }
|
72
|
+
# puts team_formatted
|
73
|
+
# fcs.push team_formatted
|
74
|
+
# end
|
75
|
+
# end
|
76
|
+
# end
|
77
|
+
# end
|
77
78
|
|
78
|
-
|
79
|
+
# Team.save_teams(fcs)
|
80
|
+
# return true
|
81
|
+
|
82
|
+
# end
|
79
83
|
|
80
84
|
end
|
data/lib/sports/nfl.rb
CHANGED
@@ -3,554 +3,661 @@ require 'open-uri'
|
|
3
3
|
|
4
4
|
class ScraperLeague
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
6
|
+
attr_reader :sport_id
|
7
|
+
attr_reader :sport_name
|
8
|
+
attr_reader :moneyline_sport
|
9
|
+
attr_reader :teams
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@moneyline_sport = false
|
13
|
+
end
|
14
|
+
|
15
|
+
def teams
|
16
|
+
@teams ||= standings
|
17
|
+
end
|
18
|
+
|
19
|
+
def standings
|
20
|
+
@standings ||= scrape_standings
|
21
|
+
end
|
22
|
+
|
23
|
+
# Gets the upcoming/current games for the sport
|
24
|
+
def current_games
|
25
|
+
@current_games ||= get_lines(["http://www.vegasinsider.com/#{sport_name}/odds/las-vegas/","http://www.vegasinsider.com/#{sport_name}/odds/las-vegas/money/"])
|
26
|
+
end
|
27
|
+
|
28
|
+
# Gets all of the schedule and results for each team
|
29
|
+
def team_schedules
|
30
|
+
@team_schedules ||= teams.map { |team|
|
31
|
+
puts " ### GETTING GAMES FOR: #{team[:info][:full_name]}"
|
32
|
+
url = "http://www.vegasinsider.com/#{sport_name}/teams/team-page.cfm/team/#{team[:info][:identifier]}"
|
33
|
+
scrape_team_page(url, team[:info][:identifier])
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
def live_scores
|
38
|
+
@live_scores = get_live_scores("https://web.archive.org/web/20170704205945/http://www.vegasinsider.com/mlb/scoreboard/")
|
39
|
+
nil
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def scrape_teams
|
45
|
+
url = "http://www.vegasinsider.com/#{sport_name}/teams/"
|
46
|
+
doc = Nokogiri::HTML(open(url)).at_css('.main-content-cell')
|
47
|
+
|
48
|
+
doc.css('a').map do |team_link|
|
49
|
+
team = {}
|
50
|
+
team[:info] = format_college_team(team_link, doc)
|
51
|
+
|
52
|
+
row = team_link.parent.parent.previous
|
53
|
+
while !(row.at_css('td') && row.at_css('td').attributes['class'].value.include?('viSubHeader1'))
|
54
|
+
row = row.previous
|
55
|
+
end
|
56
|
+
team[:grouping] = { conference: row.at_css('td').content }
|
57
|
+
team
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
######################################################
|
62
|
+
# Gets the teams and scrapes the records for the teams
|
63
|
+
def scrape_standings
|
64
|
+
standings_teams = []
|
65
|
+
url = "http://www.vegasinsider.com/#{sport_name}/standings/"
|
66
|
+
doc = Nokogiri::HTML(open(url)).at_css('.main-content-cell')
|
67
|
+
teams_doc = Nokogiri::HTML(open(url.gsub('standings','teams'))).at_css('.main-content-cell')
|
68
|
+
|
69
|
+
doc.css(standings_table_class).each do |conference|
|
70
|
+
conference_title = conference.at_css(".viHeaderNorm")
|
71
|
+
next if conference_title.nil?
|
72
|
+
|
73
|
+
table = conference.css('.viBodyBorderNorm table')[standings_table_index]
|
74
|
+
table = conference.css('.viBodyBorderNorm table')[2] if (conference_title.content == 'Conference USA' && sport_name == 'college-football')
|
75
|
+
|
76
|
+
if table
|
77
|
+
table.css('tr').each_with_index do |row, index|
|
78
|
+
next if (row.at_css('.viSubHeader1') != nil || row.at_css('.viSubHeader2') != nil)
|
79
|
+
standings_teams.push(scrape_standings_row(row, conference_division_parser(conference_title.content), teams_doc))
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
standings_teams
|
84
|
+
end
|
85
|
+
|
86
|
+
# Utility method for scraping standings
|
87
|
+
# * gets the standings table class
|
88
|
+
def standings_table_class
|
89
|
+
college_sport? ? '.SLTables1' : 'table'
|
90
|
+
end
|
91
|
+
|
92
|
+
# Utility method for scraping standings
|
93
|
+
# * gets the index of the table
|
94
|
+
def standings_table_index
|
95
|
+
college_sport? ? 1 : 0
|
96
|
+
end
|
97
|
+
|
98
|
+
# Utility method for scraping standings
|
99
|
+
# * gets the standings table class
|
100
|
+
def conference_division_parser(title)
|
101
|
+
if college_sport?
|
102
|
+
return { conference: title, division: nil }
|
103
|
+
else
|
104
|
+
result = /(?<conference>.+) - (?<division>.+)/.match(title)
|
105
|
+
return { conference: result[:conference], division: result[:division] }
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
|
110
|
+
# Utility method for scraping standings
|
111
|
+
# * is a college sport?
|
112
|
+
def college_sport?
|
113
|
+
['college-football','college-basketball'].include?(sport_name)
|
114
|
+
end
|
115
|
+
|
116
|
+
# Utility method for scraping standings
|
117
|
+
# * scrapes a row of the standings, chooses a helper method based on the league
|
118
|
+
def scrape_standings_row(row, grouping, teams_doc)
|
119
|
+
team_shell = { info: {}, record: {} }
|
120
|
+
team = case sport_id
|
121
|
+
when 0,1 then college_standings_row_parser(row, team_shell, teams_doc)
|
122
|
+
when 2 then nfl_standings_row_parser(row, team_shell)
|
123
|
+
when 3,4 then pro_standings_row_parser(row, team_shell)
|
124
|
+
when 5 then hockey_standings_row_parser(row, team_shell)
|
125
|
+
end
|
126
|
+
team[:grouping] = grouping
|
127
|
+
team
|
128
|
+
end
|
129
|
+
|
130
|
+
# Utility method for scraping standings
|
131
|
+
# * scrapes a row of the standings, for COLLEGE sports
|
132
|
+
def college_standings_row_parser(row, team, teams_doc)
|
133
|
+
row.css('td').each_with_index do |cell, cell_index|
|
134
|
+
value = remove_element_whitespace(cell)
|
135
|
+
case cell_index
|
136
|
+
when 0
|
137
|
+
team[:info] = format_college_team(cell.at_css('a'), teams_doc)
|
138
|
+
when 5 then team[:record][:overall_wins] = value.to_i
|
139
|
+
when 6 then team[:record][:overall_losses] = value.to_i
|
140
|
+
when 9 then team[:record][:home_wins] = value.to_i
|
141
|
+
when 10 then team[:record][:home_losses] = value.to_i
|
142
|
+
when 13 then team[:record][:away_wins] = value.to_i
|
143
|
+
when 14 then team[:record][:away_losses] = value.to_i
|
144
|
+
end
|
145
|
+
end
|
146
|
+
return team
|
147
|
+
end
|
148
|
+
|
149
|
+
# Utility method for scraping standings
|
150
|
+
# * scrapes a row of the standings, for NFL
|
151
|
+
def nfl_standings_row_parser(row, team)
|
152
|
+
row.css('td').each_with_index do |cell, cell_index|
|
153
|
+
content = remove_element_whitespace(cell)
|
154
|
+
|
155
|
+
case cell_index
|
156
|
+
when 0 then team[:info] = format_team(cell.at_css('a'))
|
157
|
+
when 1 then team[:record][:overall_wins] = content.to_i
|
158
|
+
when 2 then team[:record][:overall_losses] = content.to_i
|
159
|
+
when 3 then team[:record][:overall_ties] = content.to_i
|
160
|
+
when 7
|
161
|
+
record = RegularExpressions::NFL_RECORD_REGEX.match(content) || { wins: 0, losses: 0, ties: 0 }
|
162
|
+
team[:record][:home_wins] = record[:wins]
|
163
|
+
team[:record][:home_losses] = record[:losses]
|
164
|
+
team[:record][:home_ties] = record[:ties]
|
165
|
+
when 8
|
166
|
+
record = RegularExpressions::NFL_RECORD_REGEX.match(content) || { wins: 0, losses: 0, ties: 0 }
|
167
|
+
team[:record][:away_wins] = record[:wins]
|
168
|
+
team[:record][:away_losses] = record[:losses]
|
169
|
+
team[:record][:away_ties] = record[:ties]
|
170
|
+
end
|
171
|
+
end
|
172
|
+
return team
|
173
|
+
end
|
174
|
+
|
175
|
+
# Utility method for scraping standings
|
176
|
+
# * scrapes a row of the standings, for PRO (MLB)
|
177
|
+
def pro_standings_row_parser(row, team)
|
178
|
+
row.css('td').each_with_index do |cell, cell_index|
|
179
|
+
content = remove_element_whitespace(cell)
|
180
|
+
|
181
|
+
case cell_index
|
182
|
+
when 0 then team[:info] = format_team(cell.at_css('a'))
|
183
|
+
when 1 then team[:record][:overall_wins] = content.to_i
|
184
|
+
when 2 then team[:record][:overall_losses] = content.to_i
|
185
|
+
when 5
|
186
|
+
record = RegularExpressions::RECORD_REGEX.match(content) || { wins: 0, losses: 0 }
|
187
|
+
team[:record][:home_wins] = record[:wins]
|
188
|
+
team[:record][:home_losses] = record[:losses]
|
189
|
+
when 6
|
190
|
+
record = RegularExpressions::RECORD_REGEX.match(content) || { wins: 0, losses: 0 }
|
191
|
+
team[:record][:away_wins] = record[:wins]
|
192
|
+
team[:record][:away_losses] = record[:losses]
|
193
|
+
end
|
194
|
+
end
|
195
|
+
return team
|
196
|
+
end
|
197
|
+
|
198
|
+
# Utility method for scraping standings
|
199
|
+
# * scrapes a row of the standings, for NHL
|
200
|
+
def hockey_standings_row_parser(row, team)
|
201
|
+
row.css('td').each_with_index do |cell, cell_index|
|
202
|
+
content = remove_element_whitespace(cell)
|
203
|
+
|
204
|
+
case cell_index
|
205
|
+
when 0 then team[:info] = format_team(cell.at_css('a'))
|
206
|
+
when 1 then team[:record][:overall_wins] = content.to_i
|
207
|
+
when 2 then team[:record][:overall_losses] = content.to_i
|
208
|
+
when 3 then team[:record][:over_time_losses] = content.to_i
|
209
|
+
when 4 then team[:record][:shootout_losses] = content.to_i
|
210
|
+
when 5 then team[:record][:points] = content.to_i
|
211
|
+
when 8
|
212
|
+
record = RegularExpressions::NHL_RECORD_REGEX.match(content) || { wins: 0, losses: 0, ot_losses: 0, shootout_losses: 0 }
|
213
|
+
team[:record][:home_wins] = record[:wins]
|
214
|
+
team[:record][:home_losses] = record[:losses]
|
215
|
+
team[:record][:home_over_time_losses] = record[:ot_losses]
|
216
|
+
team[:record][:home_shootout_losses] = record[:shootout_losses]
|
217
|
+
when 9
|
218
|
+
record = RegularExpressions::NHL_RECORD_REGEX.match(content) || { wins: 0, losses: 0, ot_losses: 0, shootout_losses: 0 }
|
219
|
+
team[:record][:away_wins] = record[:wins]
|
220
|
+
team[:record][:away_losses] = record[:losses]
|
221
|
+
team[:record][:away_over_time_losses] = record[:ot_losses]
|
222
|
+
team[:record][:away_shootout_losses] = record[:shootout_losses]
|
223
|
+
end
|
224
|
+
end
|
225
|
+
return team
|
226
|
+
end
|
227
|
+
|
228
|
+
# Utility method for scraping standings
|
229
|
+
# * formats the team using the URL
|
230
|
+
def format_team(url)
|
231
|
+
full_name = url.content
|
232
|
+
identifier = team_url_parser(url.attribute('href'))
|
233
|
+
nickname = humanize_identifier(identifier)
|
234
|
+
|
235
|
+
{
|
236
|
+
identifier: identifier,
|
237
|
+
nickname: nickname,
|
238
|
+
location: full_name.gsub(" #{nickname}", ''),
|
239
|
+
full_name: full_name,
|
240
|
+
url: url.attribute('href').value
|
241
|
+
}
|
242
|
+
end
|
243
|
+
|
244
|
+
# Utility method for scraping standings
|
245
|
+
# * formats the team using the URL and the Nokogiri document for the teams page
|
246
|
+
def format_college_team(url, teams_doc)
|
247
|
+
full_name = team_page_full_name(teams_doc, url)
|
248
|
+
location = url.content.gsub('AM', 'A&M').gsub('AT', 'A&T')
|
249
|
+
identifier = team_url_parser(url.attribute('href'))
|
250
|
+
nickname = full_name.gsub("#{location} ",'')
|
251
|
+
|
252
|
+
if nickname == full_name
|
253
|
+
nickname = full_name.gsub('&','').gsub("#{humanize_identifier(identifier)}", '').strip
|
254
|
+
end
|
255
|
+
|
256
|
+
if nickname == full_name.gsub('&','').strip
|
257
|
+
nickname_array = nickname.split(' ')
|
258
|
+
nickname = nickname_array.each_slice( (nickname_array.size/2.0).round ).to_a[1].join(' ')
|
259
|
+
nickname = nickname_exceptions(identifier,nickname)
|
260
|
+
end
|
261
|
+
|
262
|
+
return {
|
263
|
+
identifier: identifier,
|
264
|
+
nickname: nickname,
|
265
|
+
location: full_name.gsub(" #{nickname}", ''),
|
266
|
+
full_name: full_name,
|
267
|
+
url: url.attribute('href').value
|
268
|
+
}
|
269
|
+
end
|
270
|
+
|
271
|
+
def humanize_identifier(identifier)
|
272
|
+
identifier.split('-').map { |x| x.capitalize }.join(' ')
|
273
|
+
end
|
274
|
+
|
275
|
+
def nickname_exceptions(identifier,nickname)
|
276
|
+
case identifier
|
277
|
+
when 'california-state-long-beach' then '49ers'
|
278
|
+
when 'texas-am-corpus-christi' then 'Islanders'
|
279
|
+
when 'southern-am' then 'Jaguars'
|
280
|
+
when 'saint-marys-college-california' then 'Gaels'
|
281
|
+
else nickname end
|
282
|
+
end
|
283
|
+
|
284
|
+
# Utility method for scraping standings
|
285
|
+
# * gets the full team name using the teams page
|
286
|
+
def team_page_full_name(doc,url)
|
287
|
+
doc.at_css("a[href='#{url.attribute('href')}']").content
|
288
|
+
end
|
289
|
+
|
290
|
+
##########################################
|
291
|
+
# Gets the current lines for a given sport
|
292
|
+
def get_live_scores(url)
|
293
|
+
doc = Nokogiri::HTML(open(url))
|
294
|
+
|
295
|
+
date = doc.at_css('.ff_txt2 tr:nth-child(2) font')
|
296
|
+
date = Date.strptime(date.content, '%a, %b %d') if date
|
297
|
+
|
298
|
+
games = []
|
299
|
+
|
300
|
+
doc.css('.SLTables4 table > tr').each do |row|
|
301
|
+
|
302
|
+
date_row = row.attribute('valign')
|
303
|
+
|
304
|
+
if date_row
|
305
|
+
date = parse_score_date(row)
|
306
|
+
else
|
307
|
+
|
308
|
+
row.css('.yeallowBg .sportPicksBorder').each do |game|
|
309
|
+
|
310
|
+
result = {}
|
311
|
+
game.css('.tanBg a').each_with_index do |team, i|
|
312
|
+
if i == 0
|
313
|
+
result[:away_team] = team_url_parser(team.attribute('href'))
|
314
|
+
else
|
315
|
+
result[:home_team] = team_url_parser(team.attribute('href'))
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
game_status = remove_element_whitespace(game.at_css('.sub_title_red'), true)
|
320
|
+
game_status = case
|
321
|
+
when game_status == 'Final Score' then :ended
|
322
|
+
when game_status == 'PPD' then :Postponed
|
323
|
+
when game_status == '' then :Cancelled
|
324
|
+
when game_status.include?('Game Time') then nil
|
325
|
+
else game_status end
|
326
|
+
|
327
|
+
if game_status
|
328
|
+
segment_titles = []
|
329
|
+
game.css('.sportPicksBg td').each_with_index do |col,i|
|
330
|
+
puts remove_element_whitespace(col)
|
331
|
+
next if ['Teams', 'Odds', 'ATS', ''].include?(remove_element_whitespace col)
|
332
|
+
segment_titles.push remove_element_whitespace col
|
333
|
+
end
|
334
|
+
|
335
|
+
away_values = []
|
336
|
+
game.css('.tanBg')[0].css('td').each_with_index do |col,i|
|
337
|
+
next if i < 3
|
338
|
+
away_values.push remove_element_whitespace(col).to_i
|
339
|
+
end
|
340
|
+
away_values.pop
|
341
|
+
|
342
|
+
home_values = []
|
343
|
+
game.css('.tanBg')[1].css('td').each_with_index do |col,i|
|
344
|
+
next if i < 3
|
345
|
+
home_values.push remove_element_whitespace(col).to_i
|
346
|
+
end
|
347
|
+
home_values.pop
|
348
|
+
|
349
|
+
end
|
350
|
+
|
351
|
+
if segment_titles
|
352
|
+
result[:scoring] = segment_titles.each_with_index.map { |s,i|
|
353
|
+
{ period: s, away: away_values[i], home: home_values[i] }
|
354
|
+
}
|
355
|
+
end
|
356
|
+
|
357
|
+
result[:status] = game_status
|
358
|
+
result[:date] = date
|
359
|
+
|
360
|
+
games.push(result)
|
361
|
+
puts result
|
362
|
+
puts "********************"
|
363
|
+
end
|
364
|
+
end
|
365
|
+
|
366
|
+
end
|
367
|
+
return games
|
368
|
+
end
|
369
|
+
|
370
|
+
def parse_score_date(element)
|
371
|
+
str = remove_element_whitespace(element, true).gsub(/Week\s+\d+\s+-\s/,'')
|
372
|
+
Date.strptime(str, '%A %B %d, %Y')
|
373
|
+
end
|
374
|
+
|
375
|
+
##########################################
|
376
|
+
# Gets the current lines for a given sport
|
377
|
+
def get_lines(urls)
|
378
|
+
games = []
|
379
|
+
|
380
|
+
urls.each { |url|
|
381
|
+
is_first_url = games.empty?
|
382
|
+
doc = Nokogiri::HTML(open(url))
|
383
|
+
doc.css('.viBodyBorderNorm .frodds-data-tbl tr').each do |game_row|
|
384
|
+
|
385
|
+
game_cell = game_row.at_css('td:first-child')
|
386
|
+
teams = game_cell_parser(game_cell)
|
387
|
+
game = Game.new(home_team: teams[1], away_team: teams[0])
|
388
|
+
|
389
|
+
if game.teams_found?
|
390
|
+
game.update(time: get_game_time(game_cell))
|
391
|
+
game.update(doubleheader: doubleheader_id(game_row.next&.next&.at_css('td:first-child')&.content))
|
392
|
+
is_first_url ? (games.push game) : (game = game.find_equal(games))
|
393
|
+
game.update(vegas_info: get_line(get_odds(game_row)))
|
394
|
+
game.update(vegas_info: get_line(get_odds_inner_html(game_row)))
|
395
|
+
|
396
|
+
elsif is_first_url
|
397
|
+
last_game = games.last
|
398
|
+
if last_game then last_game.update(notes: (last_game.notes ? "#{last_game.notes} / " : '') + game_cell.content) end
|
399
|
+
end
|
400
|
+
end
|
401
|
+
}
|
402
|
+
games
|
403
|
+
end
|
404
|
+
|
405
|
+
# Utility method for scraping current lines
|
406
|
+
# * find the identifier for each team
|
407
|
+
def game_cell_parser(cell)
|
408
|
+
cell.css('b a').map { |team| team_url_parser(team.attribute('href')) }
|
409
|
+
end
|
410
|
+
|
411
|
+
# Utility method for scraping current lines
|
412
|
+
# * getting the time of the game
|
413
|
+
def get_game_time(cell)
|
414
|
+
time = RegularExpressions::TIME_REGEX.match(cell.at_css('span').content.to_s)
|
415
|
+
year = ((Date.today.month > time[:mo].to_i) && (Date.today.month - 1 != time[:mo].to_i)) ? Date.today.year + 1 : Date.today.year
|
416
|
+
|
417
|
+
ENV['TZ'] = 'US/Eastern'
|
418
|
+
time = Time.strptime("#{year} #{time[:mo]} #{time[:d]} #{time[:h]}:#{time[:mi]}:00 #{time[:mer]}", "%Y %m %d %r")
|
419
|
+
ENV['TZ'] = nil
|
420
|
+
time
|
421
|
+
end
|
422
|
+
|
423
|
+
# Utility method for scraping current lines
|
424
|
+
# * getting odds from the cell, removing whitespace, and converting 1/2 to 0.5
|
425
|
+
def get_odds(odds_element)
|
426
|
+
(odds_element.at_css('td:nth-child(3) a')&.content || '').gsub(" ","").gsub("½",".5").strip
|
427
|
+
end
|
428
|
+
def get_odds_inner_html(odds_element)
|
429
|
+
((odds_element.at_css('td:nth-child(3) a'))&.inner_html || '').encode('utf-8').gsub(" ","").gsub("½",".5").strip
|
430
|
+
end
|
431
|
+
|
432
|
+
# Utility method for scraping current lines
|
433
|
+
# * parsing the lines for non-moneyline sports
|
434
|
+
def get_line(odds_string)
|
435
|
+
odds_string = odds_string.gsub('PK', '-0')
|
436
|
+
odds = matchdata_to_hash(RegularExpressions::ODDS.match(odds_string)) || {}
|
437
|
+
runlines_odds = matchdata_to_hash(RegularExpressions::RUNLINE_ODDS.match(odds_string)) || {}
|
438
|
+
moneyline_odds = matchdata_to_hash(RegularExpressions::MONEYLINE_ODDS.match(odds_string)) || {}
|
439
|
+
|
440
|
+
result = odds.merge(runlines_odds).merge(moneyline_odds)
|
441
|
+
|
442
|
+
result.each { |k,v| result[k] = result[k].to_s.to_f if result[k] }
|
443
|
+
get_home_and_away(result)
|
444
|
+
|
445
|
+
end
|
446
|
+
|
447
|
+
# Utility method for scraping current lines
|
448
|
+
# * filling the home/away lines
|
449
|
+
def get_home_and_away(result)
|
450
|
+
result['away_line'] = -result['home_line'] if result['home_line']
|
451
|
+
result['home_line'] = -result['away_line'] if result['away_line']
|
452
|
+
result
|
453
|
+
end
|
454
|
+
|
455
|
+
# Utility method for scraping current lines
|
456
|
+
# * parsing the odds to get a number
|
457
|
+
def odds_reader(odds)
|
458
|
+
case odds&.strip when '',nil then nil when 'PK' then 0 else odds.to_f end
|
459
|
+
end
|
460
|
+
|
461
|
+
# Utility method for scraping current lines
|
462
|
+
# * is the game a doubleheader
|
463
|
+
def doubleheader_id(content)
|
464
|
+
dh = RegularExpressions::DOUBLEHEADER.match(content)
|
465
|
+
dh ? dh[:id] : nil
|
466
|
+
end
|
467
|
+
|
468
|
+
################################################
|
469
|
+
# Gets the schedule and results for a team page
|
470
|
+
def scrape_team_page(url, team)
|
471
|
+
|
472
|
+
games = Nokogiri::HTML(open(url)).css('.main-content-cell table:nth-child(5) table').css('tr').each_with_index.map do |row,index|
|
473
|
+
|
474
|
+
next if index == 0
|
475
|
+
game = Game.new(vegas_info: {})
|
476
|
+
opponent = nil
|
477
|
+
|
478
|
+
row.css('td').each_with_index do |cell,m|
|
479
|
+
|
480
|
+
case m
|
481
|
+
when 0 then game.update(time: get_game_date(cell,row))
|
482
|
+
when 1
|
483
|
+
info = get_game_info(cell, team)
|
484
|
+
opponent = info[:opponent]
|
485
|
+
game.update(info[:game_info])
|
486
|
+
end
|
487
|
+
|
488
|
+
if game_finished?(row)
|
489
|
+
case m
|
490
|
+
when 2
|
491
|
+
formatted = odds_reader(remove_element_whitespace(cell))
|
492
|
+
home_team = (game.home_or_away_team(team) == :home)
|
493
|
+
if moneyline_sport
|
494
|
+
home_team ? game.update(vegas_info: {home_moneyline: formatted}) : game.update(vegas_info: {away_moneyline: formatted})
|
495
|
+
else
|
496
|
+
home_line = (formatted && !home_team) ? -formatted : formatted
|
497
|
+
game.update(vegas_info: {home_line: home_line, away_line: (home_line ? -home_line : nil)})
|
498
|
+
end
|
499
|
+
|
500
|
+
when 3 then game.update(vegas_info: { over_under: remove_element_whitespace(cell)})
|
501
|
+
when 4 then game.update(game_results(cell, team, opponent))
|
502
|
+
when 5 then game.update(ats_results(cell, team, opponent))
|
503
|
+
end
|
504
|
+
end
|
505
|
+
end
|
506
|
+
game
|
507
|
+
end
|
508
|
+
{ team: team, games: games.compact.map{ |game| game } }
|
509
|
+
end
|
510
|
+
|
511
|
+
# Utility method for scraping team page results
|
512
|
+
# * gets the date of the game, accounting for different years
|
513
|
+
def get_game_date(date_string, row)
|
514
|
+
date = Date.strptime(date_string.content.gsub!(/\s+/, ""), "%b%e")
|
515
|
+
if game_finished?(row) && date.month > Date.today.month
|
516
|
+
date = Date.new(Date.today.year - 1, date.month, date.day)
|
517
|
+
elsif !game_finished?(row) && date.month < Date.today.month
|
518
|
+
date = Date.new(Date.today.year + 1, date.month, date.day)
|
519
|
+
end
|
520
|
+
date.to_time
|
521
|
+
end
|
522
|
+
|
523
|
+
# Utility method for scraping team page results
|
524
|
+
# * determines if the game has concluded
|
525
|
+
def game_finished?(row)
|
526
|
+
!"#{RegularExpressions::GAME_RESULTS.match(remove_element_whitespace(row.at_css('td:nth-child(5)')))}".empty?
|
527
|
+
end
|
528
|
+
|
529
|
+
# Utility method for scraping team page results
|
530
|
+
# * gets the home_team, away_team, and doubleheader info
|
531
|
+
def get_game_info(cell, primary_team)
|
532
|
+
url = cell.at_css('a')
|
533
|
+
home_or_away = remove_element_whitespace(cell)[0] == "@" ? :away : :home
|
534
|
+
opponent = url ? team_url_parser(url.attribute('href')) : custom_opponent_identifier(cell)
|
535
|
+
|
536
|
+
{
|
537
|
+
opponent: opponent,
|
538
|
+
game_info: {
|
539
|
+
doubleheader: matchdata_to_hash(RegularExpressions::RESULTS_DOUBLEHEADER.match(cell.content))['doubleheader'],
|
540
|
+
home_team: home_or_away == :home ? primary_team : opponent,
|
541
|
+
away_team: home_or_away == :away ? primary_team : opponent,
|
542
|
+
}
|
543
|
+
}
|
544
|
+
end
|
545
|
+
|
546
|
+
# Utility method for scraping team page results
|
547
|
+
# * gets the result of the game
|
548
|
+
def game_results(cell, primary_team, opponent)
|
549
|
+
results = RegularExpressions::GAME_RESULTS.match(remove_element_whitespace(cell))
|
550
|
+
results_hash = matchdata_to_hash(results)
|
551
|
+
{
|
552
|
+
ending: (results_hash['result'] ? :ended : results.to_s),
|
553
|
+
winning_team: case results_hash['result'] when :won then primary_team when :lost then opponent else nil end,
|
554
|
+
winning_score: case results_hash['result'] when :won then results['team_score'] when :lost then results['oppo_score'] else nil end,
|
555
|
+
losing_score: case results_hash['result'] when :won then results['oppo_score'] when :lost then results['team_score'] else nil end,
|
556
|
+
}
|
557
|
+
end
|
558
|
+
|
559
|
+
# Utility method for scraping team page results
|
560
|
+
# * gets the spread results
|
561
|
+
def ats_results(cell, primary_team, opponent)
|
562
|
+
results = RegularExpressions::SPREAD_RESULTS.match(remove_element_whitespace(cell))
|
563
|
+
results_hash = matchdata_to_hash(results)
|
564
|
+
{
|
565
|
+
ats_winner: case results_hash['ats_result'] when :win then primary_team when :loss then opponent else nil end,
|
566
|
+
over_under_result: results_hash['ou_result']
|
567
|
+
}
|
568
|
+
end
|
569
|
+
|
570
|
+
# Utility method for scraping team page results
|
571
|
+
# * gets the identifier for an opponent without links
|
572
|
+
def custom_opponent_identifier(cell)
|
573
|
+
cell.content.strip.gsub(/(\s| )+/, '-').gsub('@-','').downcase[0..-3]
|
574
|
+
end
|
575
|
+
|
576
|
+
# General Utility Method
|
577
|
+
# used the get the team identifier from the URL
|
578
|
+
def team_url_parser(url)
|
579
|
+
/.+\/team\/(?<team_name>(\w|-)+)/.match(url)[:team_name]
|
580
|
+
end
|
581
|
+
|
582
|
+
# General Utility Method
|
583
|
+
# used the remove all whitespace from the content of the element
|
584
|
+
def remove_element_whitespace(element, only_end = false)
|
585
|
+
string = element.content.gsub(only_end ? /^(\s| )+|(\s| )+\z/ : /(\s| )+/, '')
|
586
|
+
string.empty? ? '' : string
|
587
|
+
end
|
588
|
+
|
589
|
+
def matchdata_to_hash(matchdata)
|
590
|
+
matchdata ? Hash[*matchdata.names.map{ |name| [name,(matchdata[name] ? matchdata[name].downcase.to_sym : nil)] }.flatten].compact : {}
|
591
|
+
end
|
592
|
+
|
593
|
+
# Regular Expressions Module
|
594
|
+
module RegularExpressions
|
595
|
+
RECORD_REGEX = /(?<wins>\d+)-(?<losses>\d+)/
|
596
|
+
NFL_RECORD_REGEX = /(?<wins>\d+)-(?<losses>\d+)-(?<ties>\d+)/
|
597
|
+
NHL_RECORD_REGEX = /(?<wins>\d+)-(?<losses>\d+)-(?<ot_losses>\d+)-(?<shootout_losses>\d+)/
|
598
|
+
|
599
|
+
TIME_REGEX = /(?<mo>\d{2})\/(?<d>\d{2}) (?<h>\d+):(?<mi>\d{2}) (?<mer>\w{2})/
|
600
|
+
MONEYLINE_OVER_UNDER = /(?<ou>\d+(\.5)?)[ou]/x
|
601
|
+
|
602
|
+
ODDS = /(<br><br>(?<home_line>-\d+(\.5)?))|(<br>(?<away_line>-\d+(\.5)?)[+-]\d\d<br>)|
|
603
|
+
((?<over_under>\d+(\.5)?)[ou]((-\d{2})|EV)(?<home_line>-\d+(.5)?)-\d\d\z)|
|
604
|
+
((?<away_line>-\d+(.5)?)-\d\d(?<over_under>\d+(\.5)?)[ou]((-\d{2})|EV)\z)/x
|
605
|
+
RUNLINE_ODDS = /(?<away_line>(\+|-)\d+(\.5)?)\/(\+|-)\d{3}(?<home_line>(\+|-)\d+(\.5)?)\/(\+|-)\d{3}/
|
606
|
+
MONEYLINE_ODDS = /((?<over_under>\d+(\.5)?)[ou]-\d{2})?(?<away_moneyline>(\+|-)\d{3}\d*)(?<home_moneyline>(\+|-)\d{3}\d*)/
|
607
|
+
|
608
|
+
DOUBLEHEADER = /DH Gm (?<id>\d)/
|
609
|
+
RESULTS_DOUBLEHEADER = /\(DH (?<doubleheader>\d)\)/
|
610
|
+
|
611
|
+
GAME_RESULTS = /(?<result>\D+)(?<team_score>\d+)-(?<oppo_score>\d+)|(Postponed)|(Cancelled)/
|
612
|
+
SPREAD_RESULTS = /((?<ats_result>\w+)\/)?(?<ou_result>\w+)/
|
613
|
+
end
|
614
|
+
|
615
|
+
class Game
|
616
|
+
attr_reader :time, :away_team, :home_team, :vegas_info,
|
617
|
+
:ending, :winning_team, :winning_score, :losing_score, :ats_winner, :over_under_result, :doubleheader, :notes
|
618
|
+
|
619
|
+
def initialize(args = {})
|
620
|
+
Game.sanitize(args).map { |attribute, value| instance_variable_set("@#{attribute}", value) }
|
621
|
+
end
|
622
|
+
|
623
|
+
def update(args = {})
|
624
|
+
Game.sanitize(args).map { |attribute, value|
|
625
|
+
new_val = (attribute == :vegas_info && value && vegas_info) ? value.merge(vegas_info) : value
|
626
|
+
instance_variable_set("@#{attribute}", new_val)
|
627
|
+
}
|
628
|
+
return self
|
629
|
+
end
|
630
|
+
|
631
|
+
def teams_found?
|
632
|
+
home_team && away_team
|
633
|
+
end
|
634
|
+
|
635
|
+
def find_equal(games)
|
636
|
+
games.detect { |g| g == self }
|
637
|
+
end
|
638
|
+
|
639
|
+
def ==(other_game)
|
640
|
+
home_team == other_game.home_team && away_team == other_game.away_team && time.to_date == other_game.time.to_date && doubleheader == other_game.doubleheader
|
641
|
+
end
|
642
|
+
|
643
|
+
def home_or_away_team(team)
|
644
|
+
case team
|
645
|
+
when home_team then :home
|
646
|
+
when away_team then :away
|
647
|
+
else nil end
|
648
|
+
end
|
649
|
+
|
650
|
+
def as_json
|
651
|
+
instance_variables.each_with_object({}) { |var, hash| hash[var.to_s.delete("@").to_sym] = instance_variable_get(var) }
|
652
|
+
end
|
653
|
+
|
654
|
+
private
|
655
|
+
def self.sanitize(args)
|
656
|
+
permitted_keys = [:time, :away_team, :home_team, :vegas_info,
|
657
|
+
:ending, :winning_team, :winning_score, :losing_score, :ats_winner, :over_under_result, :doubleheader, :notes]
|
658
|
+
args.select { |key,_| permitted_keys.include? key }
|
659
|
+
end
|
660
|
+
end
|
554
661
|
|
555
662
|
end
|
556
663
|
|
@@ -561,7 +668,7 @@ class Array
|
|
561
668
|
end
|
562
669
|
|
563
670
|
class Hash
|
564
|
-
|
671
|
+
def compact
|
565
672
|
self.select { |_, value| !value.nil? }
|
566
673
|
end
|
567
674
|
end
|