vegas_insider_scraper 0.0.15 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/lib/sports/mlb.rb +10 -10
- data/lib/sports/nba.rb +5 -5
- data/lib/sports/ncaabb.rb +16 -16
- data/lib/sports/ncaafb.rb +69 -65
- data/lib/sports/nfl.rb +5 -5
- data/lib/sports/scraper_league.rb +656 -549
- data/lib/vegas_insider_scraper.rb +11 -6
- metadata +22 -14
- data/lib/sports/nhl.rb +0 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 538bf0d9d798e8602915f700679ad9541dcad4b6f279cf4fd8a734030d556821
|
4
|
+
data.tar.gz: 4d0d701086e782e36ecc3524145a30189627f1940d4b5c5b6473bc4a1cb167e9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e4b0a4db84a2ce082e21de78de71c02a51f12c8c1bc4325f60151c9547cdb06ffa1ebe93b42bbae7d5206b433f76284a792d6c17f36585cb8eecde12422f95b4
|
7
|
+
data.tar.gz: b6a1ff071b8d3675cf318468c45fbc4a8806eb750cbd09be8a0656dbb9415b6b4229a978a8cf90febfec68a0c051c20e0d072d754b4db3af840833b09936a597
|
data/lib/sports/mlb.rb
CHANGED
@@ -1,16 +1,16 @@
|
|
1
1
|
|
2
2
|
class MLB < ScraperLeague
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
4
|
+
def initialize
|
5
|
+
@sport_id = 4
|
6
|
+
@sport_name = :mlb
|
7
|
+
super
|
8
|
+
@moneyline_sport = true
|
9
|
+
end
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
11
|
+
def current_games
|
12
|
+
@current_games ||= get_lines(["http://www.vegasinsider.com/#{sport_name}/odds/las-vegas/run/",
|
13
|
+
"http://www.vegasinsider.com/#{sport_name}/odds/las-vegas/"])
|
14
|
+
end
|
15
15
|
|
16
16
|
end
|
data/lib/sports/nba.rb
CHANGED
data/lib/sports/ncaabb.rb
CHANGED
@@ -1,22 +1,22 @@
|
|
1
1
|
|
2
2
|
class NCAABB < ScraperLeague
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
4
|
+
def initialize
|
5
|
+
@sport_id = 1
|
6
|
+
@sport_name = 'college-basketball'
|
7
|
+
super
|
8
|
+
end
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
10
|
+
# def get_nicknames
|
11
|
+
# start_time = Time.now
|
12
|
+
# num_successes = 0
|
13
|
+
# Team.ncaabb_teams.each_with_index do |team, i|
|
14
|
+
# url = "http://www.vegasinsider.com/college-basketball/teams/team-page.cfm/team/#{team.vegas_insider_identifier}"
|
15
|
+
# nickname = Scraper.scrape_team_page_for_nickname(team.vegas_insider_identifier, url)
|
16
|
+
# team.nickname = nickname
|
17
|
+
# team.save
|
18
|
+
# end
|
19
|
+
# Time.now - start_time
|
20
|
+
# end
|
21
21
|
|
22
22
|
end
|
data/lib/sports/ncaafb.rb
CHANGED
@@ -1,80 +1,84 @@
|
|
1
1
|
|
2
2
|
class NCAAFB < ScraperLeague
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
4
|
+
def initialize
|
5
|
+
@sport_id = 0
|
6
|
+
@sport_name = 'college-football'
|
7
|
+
super
|
8
|
+
end
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
# next if team.nickname
|
14
|
-
# url = "http://www.vegasinsider.com/college-football/teams/team-page.cfm/team/#{team.vegas_insider_identifier}"
|
15
|
-
# nickname = Scraper.scrape_team_page_for_nickname(team.vegas_insider_identifier, url)
|
16
|
-
# team.nickname = nickname
|
17
|
-
# team.save
|
18
|
-
# end
|
19
|
-
# Time.now - start_time
|
20
|
-
# end
|
10
|
+
def teams
|
11
|
+
@teams ||= scrape_teams
|
12
|
+
end
|
21
13
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
14
|
+
# def get_nicknames
|
15
|
+
# start_time = Time.now
|
16
|
+
# Team.where(sport_id: 0).each_with_index do |team, i|
|
17
|
+
# next if team.nickname
|
18
|
+
# url = "http://www.vegasinsider.com/college-football/teams/team-page.cfm/team/#{team.vegas_insider_identifier}"
|
19
|
+
# nickname = Scraper.scrape_team_page_for_nickname(team.vegas_insider_identifier, url)
|
20
|
+
# team.nickname = nickname
|
21
|
+
# team.save
|
22
|
+
# end
|
23
|
+
# Time.now - start_time
|
24
|
+
# end
|
30
25
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
26
|
+
# def get_locations
|
27
|
+
# start_time = Time.now
|
28
|
+
# Team.where(sport_id: 0, custom_team_flag: 1).each_with_index do |team, i|
|
29
|
+
# team.location = nil
|
30
|
+
# team.save
|
31
|
+
# end
|
32
|
+
# Time.now - start_time
|
33
|
+
# end
|
36
34
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
# end
|
43
|
-
# Time.now - start_time
|
44
|
-
# end
|
35
|
+
# def scrape_custom_team_page_for_location(vegas_identifier, url)
|
36
|
+
# doc = Nokogiri::HTML(open(url))
|
37
|
+
# title = doc.at_css('h1.page_title').content.gsub(' Team Page', '')
|
38
|
+
# return title
|
39
|
+
# end
|
45
40
|
|
46
|
-
|
47
|
-
|
48
|
-
|
41
|
+
# def remove_nickname_from_location
|
42
|
+
# start_time = Time.now
|
43
|
+
# Team.where(sport_id: 0).each_with_index do |team, i|
|
44
|
+
# puts team.location
|
45
|
+
# puts team.location.gsub(" #{team.nickname}", '')
|
46
|
+
# end
|
47
|
+
# Time.now - start_time
|
48
|
+
# end
|
49
49
|
|
50
|
-
|
51
|
-
|
50
|
+
# def scrape_fcs_teams
|
51
|
+
# url = 'http://www.vegasinsider.com/college-football/teams/'
|
52
|
+
# doc = Nokogiri::HTML(open(url))
|
52
53
|
|
53
|
-
|
54
|
-
|
55
|
-
# new_conference = row.at_css('td.viSubHeader1')
|
54
|
+
# current_conference = nil
|
55
|
+
# fcs = []
|
56
56
|
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
# team = row.at_css('a')
|
61
|
-
# if team
|
62
|
-
# team_formatted = {
|
63
|
-
# team_name: team.content,
|
64
|
-
# team_url_id: team_url_parser(team.attribute('href')),
|
65
|
-
# conference: current_conference,
|
66
|
-
# league: sport_id
|
67
|
-
# }
|
68
|
-
# puts team_formatted
|
69
|
-
# fcs.push team_formatted
|
70
|
-
# end
|
71
|
-
# end
|
72
|
-
# end
|
73
|
-
# end
|
57
|
+
# doc.css('.main-content-cell table table table').each_with_index do |col,i|
|
58
|
+
# col.css('tr').each do |row|
|
59
|
+
# new_conference = row.at_css('td.viSubHeader1')
|
74
60
|
|
75
|
-
|
76
|
-
|
61
|
+
# if new_conference
|
62
|
+
# current_conference = new_conference.content
|
63
|
+
# else
|
64
|
+
# team = row.at_css('a')
|
65
|
+
# if team
|
66
|
+
# team_formatted = {
|
67
|
+
# team_name: team.content,
|
68
|
+
# team_url_id: team_url_parser(team.attribute('href')),
|
69
|
+
# conference: current_conference,
|
70
|
+
# league: sport_id
|
71
|
+
# }
|
72
|
+
# puts team_formatted
|
73
|
+
# fcs.push team_formatted
|
74
|
+
# end
|
75
|
+
# end
|
76
|
+
# end
|
77
|
+
# end
|
77
78
|
|
78
|
-
|
79
|
+
# Team.save_teams(fcs)
|
80
|
+
# return true
|
81
|
+
|
82
|
+
# end
|
79
83
|
|
80
84
|
end
|
data/lib/sports/nfl.rb
CHANGED
@@ -3,554 +3,661 @@ require 'open-uri'
|
|
3
3
|
|
4
4
|
class ScraperLeague
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
551
|
-
|
552
|
-
|
553
|
-
|
6
|
+
attr_reader :sport_id
|
7
|
+
attr_reader :sport_name
|
8
|
+
attr_reader :moneyline_sport
|
9
|
+
attr_reader :teams
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@moneyline_sport = false
|
13
|
+
end
|
14
|
+
|
15
|
+
def teams
|
16
|
+
@teams ||= standings
|
17
|
+
end
|
18
|
+
|
19
|
+
def standings
|
20
|
+
@standings ||= scrape_standings
|
21
|
+
end
|
22
|
+
|
23
|
+
# Gets the upcoming/current games for the sport
|
24
|
+
def current_games
|
25
|
+
@current_games ||= get_lines(["http://www.vegasinsider.com/#{sport_name}/odds/las-vegas/","http://www.vegasinsider.com/#{sport_name}/odds/las-vegas/money/"])
|
26
|
+
end
|
27
|
+
|
28
|
+
# Gets all of the schedule and results for each team
|
29
|
+
def team_schedules
|
30
|
+
@team_schedules ||= teams.map { |team|
|
31
|
+
puts " ### GETTING GAMES FOR: #{team[:info][:full_name]}"
|
32
|
+
url = "http://www.vegasinsider.com/#{sport_name}/teams/team-page.cfm/team/#{team[:info][:identifier]}"
|
33
|
+
scrape_team_page(url, team[:info][:identifier])
|
34
|
+
}
|
35
|
+
end
|
36
|
+
|
37
|
+
def live_scores
|
38
|
+
@live_scores = get_live_scores("https://web.archive.org/web/20170704205945/http://www.vegasinsider.com/mlb/scoreboard/")
|
39
|
+
nil
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def scrape_teams
|
45
|
+
url = "http://www.vegasinsider.com/#{sport_name}/teams/"
|
46
|
+
doc = Nokogiri::HTML(open(url)).at_css('.main-content-cell')
|
47
|
+
|
48
|
+
doc.css('a').map do |team_link|
|
49
|
+
team = {}
|
50
|
+
team[:info] = format_college_team(team_link, doc)
|
51
|
+
|
52
|
+
row = team_link.parent.parent.previous
|
53
|
+
while !(row.at_css('td') && row.at_css('td').attributes['class'].value.include?('viSubHeader1'))
|
54
|
+
row = row.previous
|
55
|
+
end
|
56
|
+
team[:grouping] = { conference: row.at_css('td').content }
|
57
|
+
team
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
######################################################
|
62
|
+
# Gets the teams and scrapes the records for the teams
|
63
|
+
def scrape_standings
|
64
|
+
standings_teams = []
|
65
|
+
url = "http://www.vegasinsider.com/#{sport_name}/standings/"
|
66
|
+
doc = Nokogiri::HTML(open(url)).at_css('.main-content-cell')
|
67
|
+
teams_doc = Nokogiri::HTML(open(url.gsub('standings','teams'))).at_css('.main-content-cell')
|
68
|
+
|
69
|
+
doc.css(standings_table_class).each do |conference|
|
70
|
+
conference_title = conference.at_css(".viHeaderNorm")
|
71
|
+
next if conference_title.nil?
|
72
|
+
|
73
|
+
table = conference.css('.viBodyBorderNorm table')[standings_table_index]
|
74
|
+
table = conference.css('.viBodyBorderNorm table')[2] if (conference_title.content == 'Conference USA' && sport_name == 'college-football')
|
75
|
+
|
76
|
+
if table
|
77
|
+
table.css('tr').each_with_index do |row, index|
|
78
|
+
next if (row.at_css('.viSubHeader1') != nil || row.at_css('.viSubHeader2') != nil)
|
79
|
+
standings_teams.push(scrape_standings_row(row, conference_division_parser(conference_title.content), teams_doc))
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
standings_teams
|
84
|
+
end
|
85
|
+
|
86
|
+
# Utility method for scraping standings
|
87
|
+
# * gets the standings table class
|
88
|
+
def standings_table_class
|
89
|
+
college_sport? ? '.SLTables1' : 'table'
|
90
|
+
end
|
91
|
+
|
92
|
+
# Utility method for scraping standings
|
93
|
+
# * gets the index of the table
|
94
|
+
def standings_table_index
|
95
|
+
college_sport? ? 1 : 0
|
96
|
+
end
|
97
|
+
|
98
|
+
# Utility method for scraping standings
|
99
|
+
# * gets the standings table class
|
100
|
+
def conference_division_parser(title)
|
101
|
+
if college_sport?
|
102
|
+
return { conference: title, division: nil }
|
103
|
+
else
|
104
|
+
result = /(?<conference>.+) - (?<division>.+)/.match(title)
|
105
|
+
return { conference: result[:conference], division: result[:division] }
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
|
110
|
+
# Utility method for scraping standings
|
111
|
+
# * is a college sport?
|
112
|
+
def college_sport?
|
113
|
+
['college-football','college-basketball'].include?(sport_name)
|
114
|
+
end
|
115
|
+
|
116
|
+
# Utility method for scraping standings
|
117
|
+
# * scrapes a row of the standings, chooses a helper method based on the league
|
118
|
+
def scrape_standings_row(row, grouping, teams_doc)
|
119
|
+
team_shell = { info: {}, record: {} }
|
120
|
+
team = case sport_id
|
121
|
+
when 0,1 then college_standings_row_parser(row, team_shell, teams_doc)
|
122
|
+
when 2 then nfl_standings_row_parser(row, team_shell)
|
123
|
+
when 3,4 then pro_standings_row_parser(row, team_shell)
|
124
|
+
when 5 then hockey_standings_row_parser(row, team_shell)
|
125
|
+
end
|
126
|
+
team[:grouping] = grouping
|
127
|
+
team
|
128
|
+
end
|
129
|
+
|
130
|
+
# Utility method for scraping standings
|
131
|
+
# * scrapes a row of the standings, for COLLEGE sports
|
132
|
+
def college_standings_row_parser(row, team, teams_doc)
|
133
|
+
row.css('td').each_with_index do |cell, cell_index|
|
134
|
+
value = remove_element_whitespace(cell)
|
135
|
+
case cell_index
|
136
|
+
when 0
|
137
|
+
team[:info] = format_college_team(cell.at_css('a'), teams_doc)
|
138
|
+
when 5 then team[:record][:overall_wins] = value.to_i
|
139
|
+
when 6 then team[:record][:overall_losses] = value.to_i
|
140
|
+
when 9 then team[:record][:home_wins] = value.to_i
|
141
|
+
when 10 then team[:record][:home_losses] = value.to_i
|
142
|
+
when 13 then team[:record][:away_wins] = value.to_i
|
143
|
+
when 14 then team[:record][:away_losses] = value.to_i
|
144
|
+
end
|
145
|
+
end
|
146
|
+
return team
|
147
|
+
end
|
148
|
+
|
149
|
+
# Utility method for scraping standings
|
150
|
+
# * scrapes a row of the standings, for NFL
|
151
|
+
def nfl_standings_row_parser(row, team)
|
152
|
+
row.css('td').each_with_index do |cell, cell_index|
|
153
|
+
content = remove_element_whitespace(cell)
|
154
|
+
|
155
|
+
case cell_index
|
156
|
+
when 0 then team[:info] = format_team(cell.at_css('a'))
|
157
|
+
when 1 then team[:record][:overall_wins] = content.to_i
|
158
|
+
when 2 then team[:record][:overall_losses] = content.to_i
|
159
|
+
when 3 then team[:record][:overall_ties] = content.to_i
|
160
|
+
when 7
|
161
|
+
record = RegularExpressions::NFL_RECORD_REGEX.match(content) || { wins: 0, losses: 0, ties: 0 }
|
162
|
+
team[:record][:home_wins] = record[:wins]
|
163
|
+
team[:record][:home_losses] = record[:losses]
|
164
|
+
team[:record][:home_ties] = record[:ties]
|
165
|
+
when 8
|
166
|
+
record = RegularExpressions::NFL_RECORD_REGEX.match(content) || { wins: 0, losses: 0, ties: 0 }
|
167
|
+
team[:record][:away_wins] = record[:wins]
|
168
|
+
team[:record][:away_losses] = record[:losses]
|
169
|
+
team[:record][:away_ties] = record[:ties]
|
170
|
+
end
|
171
|
+
end
|
172
|
+
return team
|
173
|
+
end
|
174
|
+
|
175
|
+
# Utility method for scraping standings
|
176
|
+
# * scrapes a row of the standings, for PRO (MLB)
|
177
|
+
def pro_standings_row_parser(row, team)
|
178
|
+
row.css('td').each_with_index do |cell, cell_index|
|
179
|
+
content = remove_element_whitespace(cell)
|
180
|
+
|
181
|
+
case cell_index
|
182
|
+
when 0 then team[:info] = format_team(cell.at_css('a'))
|
183
|
+
when 1 then team[:record][:overall_wins] = content.to_i
|
184
|
+
when 2 then team[:record][:overall_losses] = content.to_i
|
185
|
+
when 5
|
186
|
+
record = RegularExpressions::RECORD_REGEX.match(content) || { wins: 0, losses: 0 }
|
187
|
+
team[:record][:home_wins] = record[:wins]
|
188
|
+
team[:record][:home_losses] = record[:losses]
|
189
|
+
when 6
|
190
|
+
record = RegularExpressions::RECORD_REGEX.match(content) || { wins: 0, losses: 0 }
|
191
|
+
team[:record][:away_wins] = record[:wins]
|
192
|
+
team[:record][:away_losses] = record[:losses]
|
193
|
+
end
|
194
|
+
end
|
195
|
+
return team
|
196
|
+
end
|
197
|
+
|
198
|
+
# Utility method for scraping standings
|
199
|
+
# * scrapes a row of the standings, for NHL
|
200
|
+
def hockey_standings_row_parser(row, team)
|
201
|
+
row.css('td').each_with_index do |cell, cell_index|
|
202
|
+
content = remove_element_whitespace(cell)
|
203
|
+
|
204
|
+
case cell_index
|
205
|
+
when 0 then team[:info] = format_team(cell.at_css('a'))
|
206
|
+
when 1 then team[:record][:overall_wins] = content.to_i
|
207
|
+
when 2 then team[:record][:overall_losses] = content.to_i
|
208
|
+
when 3 then team[:record][:over_time_losses] = content.to_i
|
209
|
+
when 4 then team[:record][:shootout_losses] = content.to_i
|
210
|
+
when 5 then team[:record][:points] = content.to_i
|
211
|
+
when 8
|
212
|
+
record = RegularExpressions::NHL_RECORD_REGEX.match(content) || { wins: 0, losses: 0, ot_losses: 0, shootout_losses: 0 }
|
213
|
+
team[:record][:home_wins] = record[:wins]
|
214
|
+
team[:record][:home_losses] = record[:losses]
|
215
|
+
team[:record][:home_over_time_losses] = record[:ot_losses]
|
216
|
+
team[:record][:home_shootout_losses] = record[:shootout_losses]
|
217
|
+
when 9
|
218
|
+
record = RegularExpressions::NHL_RECORD_REGEX.match(content) || { wins: 0, losses: 0, ot_losses: 0, shootout_losses: 0 }
|
219
|
+
team[:record][:away_wins] = record[:wins]
|
220
|
+
team[:record][:away_losses] = record[:losses]
|
221
|
+
team[:record][:away_over_time_losses] = record[:ot_losses]
|
222
|
+
team[:record][:away_shootout_losses] = record[:shootout_losses]
|
223
|
+
end
|
224
|
+
end
|
225
|
+
return team
|
226
|
+
end
|
227
|
+
|
228
|
+
# Utility method for scraping standings
|
229
|
+
# * formats the team using the URL
|
230
|
+
def format_team(url)
|
231
|
+
full_name = url.content
|
232
|
+
identifier = team_url_parser(url.attribute('href'))
|
233
|
+
nickname = humanize_identifier(identifier)
|
234
|
+
|
235
|
+
{
|
236
|
+
identifier: identifier,
|
237
|
+
nickname: nickname,
|
238
|
+
location: full_name.gsub(" #{nickname}", ''),
|
239
|
+
full_name: full_name,
|
240
|
+
url: url.attribute('href').value
|
241
|
+
}
|
242
|
+
end
|
243
|
+
|
244
|
+
# Utility method for scraping standings
|
245
|
+
# * formats the team using the URL and the Nokogiri document for the teams page
|
246
|
+
def format_college_team(url, teams_doc)
|
247
|
+
full_name = team_page_full_name(teams_doc, url)
|
248
|
+
location = url.content.gsub('AM', 'A&M').gsub('AT', 'A&T')
|
249
|
+
identifier = team_url_parser(url.attribute('href'))
|
250
|
+
nickname = full_name.gsub("#{location} ",'')
|
251
|
+
|
252
|
+
if nickname == full_name
|
253
|
+
nickname = full_name.gsub('&','').gsub("#{humanize_identifier(identifier)}", '').strip
|
254
|
+
end
|
255
|
+
|
256
|
+
if nickname == full_name.gsub('&','').strip
|
257
|
+
nickname_array = nickname.split(' ')
|
258
|
+
nickname = nickname_array.each_slice( (nickname_array.size/2.0).round ).to_a[1].join(' ')
|
259
|
+
nickname = nickname_exceptions(identifier,nickname)
|
260
|
+
end
|
261
|
+
|
262
|
+
return {
|
263
|
+
identifier: identifier,
|
264
|
+
nickname: nickname,
|
265
|
+
location: full_name.gsub(" #{nickname}", ''),
|
266
|
+
full_name: full_name,
|
267
|
+
url: url.attribute('href').value
|
268
|
+
}
|
269
|
+
end
|
270
|
+
|
271
|
+
def humanize_identifier(identifier)
|
272
|
+
identifier.split('-').map { |x| x.capitalize }.join(' ')
|
273
|
+
end
|
274
|
+
|
275
|
+
def nickname_exceptions(identifier,nickname)
|
276
|
+
case identifier
|
277
|
+
when 'california-state-long-beach' then '49ers'
|
278
|
+
when 'texas-am-corpus-christi' then 'Islanders'
|
279
|
+
when 'southern-am' then 'Jaguars'
|
280
|
+
when 'saint-marys-college-california' then 'Gaels'
|
281
|
+
else nickname end
|
282
|
+
end
|
283
|
+
|
284
|
+
# Utility method for scraping standings
|
285
|
+
# * gets the full team name using the teams page
|
286
|
+
def team_page_full_name(doc,url)
|
287
|
+
doc.at_css("a[href='#{url.attribute('href')}']").content
|
288
|
+
end
|
289
|
+
|
290
|
+
##########################################
|
291
|
+
# Gets the current lines for a given sport
|
292
|
+
def get_live_scores(url)
|
293
|
+
doc = Nokogiri::HTML(open(url))
|
294
|
+
|
295
|
+
date = doc.at_css('.ff_txt2 tr:nth-child(2) font')
|
296
|
+
date = Date.strptime(date.content, '%a, %b %d') if date
|
297
|
+
|
298
|
+
games = []
|
299
|
+
|
300
|
+
doc.css('.SLTables4 table > tr').each do |row|
|
301
|
+
|
302
|
+
date_row = row.attribute('valign')
|
303
|
+
|
304
|
+
if date_row
|
305
|
+
date = parse_score_date(row)
|
306
|
+
else
|
307
|
+
|
308
|
+
row.css('.yeallowBg .sportPicksBorder').each do |game|
|
309
|
+
|
310
|
+
result = {}
|
311
|
+
game.css('.tanBg a').each_with_index do |team, i|
|
312
|
+
if i == 0
|
313
|
+
result[:away_team] = team_url_parser(team.attribute('href'))
|
314
|
+
else
|
315
|
+
result[:home_team] = team_url_parser(team.attribute('href'))
|
316
|
+
end
|
317
|
+
end
|
318
|
+
|
319
|
+
game_status = remove_element_whitespace(game.at_css('.sub_title_red'), true)
|
320
|
+
game_status = case
|
321
|
+
when game_status == 'Final Score' then :ended
|
322
|
+
when game_status == 'PPD' then :Postponed
|
323
|
+
when game_status == '' then :Cancelled
|
324
|
+
when game_status.include?('Game Time') then nil
|
325
|
+
else game_status end
|
326
|
+
|
327
|
+
if game_status
|
328
|
+
segment_titles = []
|
329
|
+
game.css('.sportPicksBg td').each_with_index do |col,i|
|
330
|
+
puts remove_element_whitespace(col)
|
331
|
+
next if ['Teams', 'Odds', 'ATS', ''].include?(remove_element_whitespace col)
|
332
|
+
segment_titles.push remove_element_whitespace col
|
333
|
+
end
|
334
|
+
|
335
|
+
away_values = []
|
336
|
+
game.css('.tanBg')[0].css('td').each_with_index do |col,i|
|
337
|
+
next if i < 3
|
338
|
+
away_values.push remove_element_whitespace(col).to_i
|
339
|
+
end
|
340
|
+
away_values.pop
|
341
|
+
|
342
|
+
home_values = []
|
343
|
+
game.css('.tanBg')[1].css('td').each_with_index do |col,i|
|
344
|
+
next if i < 3
|
345
|
+
home_values.push remove_element_whitespace(col).to_i
|
346
|
+
end
|
347
|
+
home_values.pop
|
348
|
+
|
349
|
+
end
|
350
|
+
|
351
|
+
if segment_titles
|
352
|
+
result[:scoring] = segment_titles.each_with_index.map { |s,i|
|
353
|
+
{ period: s, away: away_values[i], home: home_values[i] }
|
354
|
+
}
|
355
|
+
end
|
356
|
+
|
357
|
+
result[:status] = game_status
|
358
|
+
result[:date] = date
|
359
|
+
|
360
|
+
games.push(result)
|
361
|
+
puts result
|
362
|
+
puts "********************"
|
363
|
+
end
|
364
|
+
end
|
365
|
+
|
366
|
+
end
|
367
|
+
return games
|
368
|
+
end
|
369
|
+
|
370
|
+
def parse_score_date(element)
|
371
|
+
str = remove_element_whitespace(element, true).gsub(/Week\s+\d+\s+-\s/,'')
|
372
|
+
Date.strptime(str, '%A %B %d, %Y')
|
373
|
+
end
|
374
|
+
|
375
|
+
##########################################
|
376
|
+
# Gets the current lines for a given sport
|
377
|
+
def get_lines(urls)
|
378
|
+
games = []
|
379
|
+
|
380
|
+
urls.each { |url|
|
381
|
+
is_first_url = games.empty?
|
382
|
+
doc = Nokogiri::HTML(open(url))
|
383
|
+
doc.css('.viBodyBorderNorm .frodds-data-tbl tr').each do |game_row|
|
384
|
+
|
385
|
+
game_cell = game_row.at_css('td:first-child')
|
386
|
+
teams = game_cell_parser(game_cell)
|
387
|
+
game = Game.new(home_team: teams[1], away_team: teams[0])
|
388
|
+
|
389
|
+
if game.teams_found?
|
390
|
+
game.update(time: get_game_time(game_cell))
|
391
|
+
game.update(doubleheader: doubleheader_id(game_row.next&.next&.at_css('td:first-child')&.content))
|
392
|
+
is_first_url ? (games.push game) : (game = game.find_equal(games))
|
393
|
+
game.update(vegas_info: get_line(get_odds(game_row)))
|
394
|
+
game.update(vegas_info: get_line(get_odds_inner_html(game_row)))
|
395
|
+
|
396
|
+
elsif is_first_url
|
397
|
+
last_game = games.last
|
398
|
+
if last_game then last_game.update(notes: (last_game.notes ? "#{last_game.notes} / " : '') + game_cell.content) end
|
399
|
+
end
|
400
|
+
end
|
401
|
+
}
|
402
|
+
games
|
403
|
+
end
|
404
|
+
|
405
|
+
# Utility method for scraping current lines
|
406
|
+
# * find the identifier for each team
|
407
|
+
def game_cell_parser(cell)
|
408
|
+
cell.css('b a').map { |team| team_url_parser(team.attribute('href')) }
|
409
|
+
end
|
410
|
+
|
411
|
+
# Utility method for scraping current lines
|
412
|
+
# * getting the time of the game
|
413
|
+
def get_game_time(cell)
|
414
|
+
time = RegularExpressions::TIME_REGEX.match(cell.at_css('span').content.to_s)
|
415
|
+
year = ((Date.today.month > time[:mo].to_i) && (Date.today.month - 1 != time[:mo].to_i)) ? Date.today.year + 1 : Date.today.year
|
416
|
+
|
417
|
+
ENV['TZ'] = 'US/Eastern'
|
418
|
+
time = Time.strptime("#{year} #{time[:mo]} #{time[:d]} #{time[:h]}:#{time[:mi]}:00 #{time[:mer]}", "%Y %m %d %r")
|
419
|
+
ENV['TZ'] = nil
|
420
|
+
time
|
421
|
+
end
|
422
|
+
|
423
|
+
# Utility method for scraping current lines
|
424
|
+
# * getting odds from the cell, removing whitespace, and converting 1/2 to 0.5
|
425
|
+
def get_odds(odds_element)
|
426
|
+
(odds_element.at_css('td:nth-child(3) a')&.content || '').gsub(" ","").gsub("½",".5").strip
|
427
|
+
end
|
428
|
+
def get_odds_inner_html(odds_element)
|
429
|
+
((odds_element.at_css('td:nth-child(3) a'))&.inner_html || '').encode('utf-8').gsub(" ","").gsub("½",".5").strip
|
430
|
+
end
|
431
|
+
|
432
|
+
# Utility method for scraping current lines
|
433
|
+
# * parsing the lines for non-moneyline sports
|
434
|
+
def get_line(odds_string)
|
435
|
+
odds_string = odds_string.gsub('PK', '-0')
|
436
|
+
odds = matchdata_to_hash(RegularExpressions::ODDS.match(odds_string)) || {}
|
437
|
+
runlines_odds = matchdata_to_hash(RegularExpressions::RUNLINE_ODDS.match(odds_string)) || {}
|
438
|
+
moneyline_odds = matchdata_to_hash(RegularExpressions::MONEYLINE_ODDS.match(odds_string)) || {}
|
439
|
+
|
440
|
+
result = odds.merge(runlines_odds).merge(moneyline_odds)
|
441
|
+
|
442
|
+
result.each { |k,v| result[k] = result[k].to_s.to_f if result[k] }
|
443
|
+
get_home_and_away(result)
|
444
|
+
|
445
|
+
end
|
446
|
+
|
447
|
+
# Utility method for scraping current lines
|
448
|
+
# * filling the home/away lines
|
449
|
+
def get_home_and_away(result)
|
450
|
+
result['away_line'] = -result['home_line'] if result['home_line']
|
451
|
+
result['home_line'] = -result['away_line'] if result['away_line']
|
452
|
+
result
|
453
|
+
end
|
454
|
+
|
455
|
+
# Utility method for scraping current lines
|
456
|
+
# * parsing the odds to get a number
|
457
|
+
def odds_reader(odds)
|
458
|
+
case odds&.strip when '',nil then nil when 'PK' then 0 else odds.to_f end
|
459
|
+
end
|
460
|
+
|
461
|
+
# Utility method for scraping current lines
|
462
|
+
# * is the game a doubleheader
|
463
|
+
def doubleheader_id(content)
|
464
|
+
dh = RegularExpressions::DOUBLEHEADER.match(content)
|
465
|
+
dh ? dh[:id] : nil
|
466
|
+
end
|
467
|
+
|
468
|
+
################################################
|
469
|
+
# Gets the schedule and results for a team page
|
470
|
+
def scrape_team_page(url, team)
|
471
|
+
|
472
|
+
games = Nokogiri::HTML(open(url)).css('.main-content-cell table:nth-child(5) table').css('tr').each_with_index.map do |row,index|
|
473
|
+
|
474
|
+
next if index == 0
|
475
|
+
game = Game.new(vegas_info: {})
|
476
|
+
opponent = nil
|
477
|
+
|
478
|
+
row.css('td').each_with_index do |cell,m|
|
479
|
+
|
480
|
+
case m
|
481
|
+
when 0 then game.update(time: get_game_date(cell,row))
|
482
|
+
when 1
|
483
|
+
info = get_game_info(cell, team)
|
484
|
+
opponent = info[:opponent]
|
485
|
+
game.update(info[:game_info])
|
486
|
+
end
|
487
|
+
|
488
|
+
if game_finished?(row)
|
489
|
+
case m
|
490
|
+
when 2
|
491
|
+
formatted = odds_reader(remove_element_whitespace(cell))
|
492
|
+
home_team = (game.home_or_away_team(team) == :home)
|
493
|
+
if moneyline_sport
|
494
|
+
home_team ? game.update(vegas_info: {home_moneyline: formatted}) : game.update(vegas_info: {away_moneyline: formatted})
|
495
|
+
else
|
496
|
+
home_line = (formatted && !home_team) ? -formatted : formatted
|
497
|
+
game.update(vegas_info: {home_line: home_line, away_line: (home_line ? -home_line : nil)})
|
498
|
+
end
|
499
|
+
|
500
|
+
when 3 then game.update(vegas_info: { over_under: remove_element_whitespace(cell)})
|
501
|
+
when 4 then game.update(game_results(cell, team, opponent))
|
502
|
+
when 5 then game.update(ats_results(cell, team, opponent))
|
503
|
+
end
|
504
|
+
end
|
505
|
+
end
|
506
|
+
game
|
507
|
+
end
|
508
|
+
{ team: team, games: games.compact.map{ |game| game } }
|
509
|
+
end
|
510
|
+
|
511
|
+
# Utility method for scraping team page results
|
512
|
+
# * gets the date of the game, accounting for different years
|
513
|
+
def get_game_date(date_string, row)
|
514
|
+
date = Date.strptime(date_string.content.gsub!(/\s+/, ""), "%b%e")
|
515
|
+
if game_finished?(row) && date.month > Date.today.month
|
516
|
+
date = Date.new(Date.today.year - 1, date.month, date.day)
|
517
|
+
elsif !game_finished?(row) && date.month < Date.today.month
|
518
|
+
date = Date.new(Date.today.year + 1, date.month, date.day)
|
519
|
+
end
|
520
|
+
date.to_time
|
521
|
+
end
|
522
|
+
|
523
|
+
# Utility method for scraping team page results
|
524
|
+
# * determines if the game has concluded
|
525
|
+
def game_finished?(row)
|
526
|
+
!"#{RegularExpressions::GAME_RESULTS.match(remove_element_whitespace(row.at_css('td:nth-child(5)')))}".empty?
|
527
|
+
end
|
528
|
+
|
529
|
+
# Utility method for scraping team page results
|
530
|
+
# * gets the home_team, away_team, and doubleheader info
|
531
|
+
def get_game_info(cell, primary_team)
|
532
|
+
url = cell.at_css('a')
|
533
|
+
home_or_away = remove_element_whitespace(cell)[0] == "@" ? :away : :home
|
534
|
+
opponent = url ? team_url_parser(url.attribute('href')) : custom_opponent_identifier(cell)
|
535
|
+
|
536
|
+
{
|
537
|
+
opponent: opponent,
|
538
|
+
game_info: {
|
539
|
+
doubleheader: matchdata_to_hash(RegularExpressions::RESULTS_DOUBLEHEADER.match(cell.content))['doubleheader'],
|
540
|
+
home_team: home_or_away == :home ? primary_team : opponent,
|
541
|
+
away_team: home_or_away == :away ? primary_team : opponent,
|
542
|
+
}
|
543
|
+
}
|
544
|
+
end
|
545
|
+
|
546
|
+
# Utility method for scraping team page results
|
547
|
+
# * gets the result of the game
|
548
|
+
def game_results(cell, primary_team, opponent)
|
549
|
+
results = RegularExpressions::GAME_RESULTS.match(remove_element_whitespace(cell))
|
550
|
+
results_hash = matchdata_to_hash(results)
|
551
|
+
{
|
552
|
+
ending: (results_hash['result'] ? :ended : results.to_s),
|
553
|
+
winning_team: case results_hash['result'] when :won then primary_team when :lost then opponent else nil end,
|
554
|
+
winning_score: case results_hash['result'] when :won then results['team_score'] when :lost then results['oppo_score'] else nil end,
|
555
|
+
losing_score: case results_hash['result'] when :won then results['oppo_score'] when :lost then results['team_score'] else nil end,
|
556
|
+
}
|
557
|
+
end
|
558
|
+
|
559
|
+
# Utility method for scraping team page results
|
560
|
+
# * gets the spread results
|
561
|
+
def ats_results(cell, primary_team, opponent)
|
562
|
+
results = RegularExpressions::SPREAD_RESULTS.match(remove_element_whitespace(cell))
|
563
|
+
results_hash = matchdata_to_hash(results)
|
564
|
+
{
|
565
|
+
ats_winner: case results_hash['ats_result'] when :win then primary_team when :loss then opponent else nil end,
|
566
|
+
over_under_result: results_hash['ou_result']
|
567
|
+
}
|
568
|
+
end
|
569
|
+
|
570
|
+
# Utility method for scraping team page results
|
571
|
+
# * gets the identifier for an opponent without links
|
572
|
+
def custom_opponent_identifier(cell)
|
573
|
+
cell.content.strip.gsub(/(\s| )+/, '-').gsub('@-','').downcase[0..-3]
|
574
|
+
end
|
575
|
+
|
576
|
+
# General Utility Method
|
577
|
+
# used the get the team identifier from the URL
|
578
|
+
def team_url_parser(url)
|
579
|
+
/.+\/team\/(?<team_name>(\w|-)+)/.match(url)[:team_name]
|
580
|
+
end
|
581
|
+
|
582
|
+
# General Utility Method
|
583
|
+
# used the remove all whitespace from the content of the element
|
584
|
+
def remove_element_whitespace(element, only_end = false)
|
585
|
+
string = element.content.gsub(only_end ? /^(\s| )+|(\s| )+\z/ : /(\s| )+/, '')
|
586
|
+
string.empty? ? '' : string
|
587
|
+
end
|
588
|
+
|
589
|
+
def matchdata_to_hash(matchdata)
|
590
|
+
matchdata ? Hash[*matchdata.names.map{ |name| [name,(matchdata[name] ? matchdata[name].downcase.to_sym : nil)] }.flatten].compact : {}
|
591
|
+
end
|
592
|
+
|
593
|
+
# Regular Expressions Module
|
594
|
+
module RegularExpressions
|
595
|
+
RECORD_REGEX = /(?<wins>\d+)-(?<losses>\d+)/
|
596
|
+
NFL_RECORD_REGEX = /(?<wins>\d+)-(?<losses>\d+)-(?<ties>\d+)/
|
597
|
+
NHL_RECORD_REGEX = /(?<wins>\d+)-(?<losses>\d+)-(?<ot_losses>\d+)-(?<shootout_losses>\d+)/
|
598
|
+
|
599
|
+
TIME_REGEX = /(?<mo>\d{2})\/(?<d>\d{2}) (?<h>\d+):(?<mi>\d{2}) (?<mer>\w{2})/
|
600
|
+
MONEYLINE_OVER_UNDER = /(?<ou>\d+(\.5)?)[ou]/x
|
601
|
+
|
602
|
+
ODDS = /(<br><br>(?<home_line>-\d+(\.5)?))|(<br>(?<away_line>-\d+(\.5)?)[+-]\d\d<br>)|
|
603
|
+
((?<over_under>\d+(\.5)?)[ou]((-\d{2})|EV)(?<home_line>-\d+(.5)?)-\d\d\z)|
|
604
|
+
((?<away_line>-\d+(.5)?)-\d\d(?<over_under>\d+(\.5)?)[ou]((-\d{2})|EV)\z)/x
|
605
|
+
RUNLINE_ODDS = /(?<away_line>(\+|-)\d+(\.5)?)\/(\+|-)\d{3}(?<home_line>(\+|-)\d+(\.5)?)\/(\+|-)\d{3}/
|
606
|
+
MONEYLINE_ODDS = /((?<over_under>\d+(\.5)?)[ou]-\d{2})?(?<away_moneyline>(\+|-)\d{3}\d*)(?<home_moneyline>(\+|-)\d{3}\d*)/
|
607
|
+
|
608
|
+
DOUBLEHEADER = /DH Gm (?<id>\d)/
|
609
|
+
RESULTS_DOUBLEHEADER = /\(DH (?<doubleheader>\d)\)/
|
610
|
+
|
611
|
+
GAME_RESULTS = /(?<result>\D+)(?<team_score>\d+)-(?<oppo_score>\d+)|(Postponed)|(Cancelled)/
|
612
|
+
SPREAD_RESULTS = /((?<ats_result>\w+)\/)?(?<ou_result>\w+)/
|
613
|
+
end
|
614
|
+
|
615
|
+
class Game
|
616
|
+
attr_reader :time, :away_team, :home_team, :vegas_info,
|
617
|
+
:ending, :winning_team, :winning_score, :losing_score, :ats_winner, :over_under_result, :doubleheader, :notes
|
618
|
+
|
619
|
+
def initialize(args = {})
|
620
|
+
Game.sanitize(args).map { |attribute, value| instance_variable_set("@#{attribute}", value) }
|
621
|
+
end
|
622
|
+
|
623
|
+
def update(args = {})
|
624
|
+
Game.sanitize(args).map { |attribute, value|
|
625
|
+
new_val = (attribute == :vegas_info && value && vegas_info) ? value.merge(vegas_info) : value
|
626
|
+
instance_variable_set("@#{attribute}", new_val)
|
627
|
+
}
|
628
|
+
return self
|
629
|
+
end
|
630
|
+
|
631
|
+
def teams_found?
|
632
|
+
home_team && away_team
|
633
|
+
end
|
634
|
+
|
635
|
+
def find_equal(games)
|
636
|
+
games.detect { |g| g == self }
|
637
|
+
end
|
638
|
+
|
639
|
+
def ==(other_game)
|
640
|
+
home_team == other_game.home_team && away_team == other_game.away_team && time.to_date == other_game.time.to_date && doubleheader == other_game.doubleheader
|
641
|
+
end
|
642
|
+
|
643
|
+
def home_or_away_team(team)
|
644
|
+
case team
|
645
|
+
when home_team then :home
|
646
|
+
when away_team then :away
|
647
|
+
else nil end
|
648
|
+
end
|
649
|
+
|
650
|
+
def as_json
|
651
|
+
instance_variables.each_with_object({}) { |var, hash| hash[var.to_s.delete("@").to_sym] = instance_variable_get(var) }
|
652
|
+
end
|
653
|
+
|
654
|
+
private
|
655
|
+
def self.sanitize(args)
|
656
|
+
permitted_keys = [:time, :away_team, :home_team, :vegas_info,
|
657
|
+
:ending, :winning_team, :winning_score, :losing_score, :ats_winner, :over_under_result, :doubleheader, :notes]
|
658
|
+
args.select { |key,_| permitted_keys.include? key }
|
659
|
+
end
|
660
|
+
end
|
554
661
|
|
555
662
|
end
|
556
663
|
|
@@ -561,7 +668,7 @@ class Array
|
|
561
668
|
end
|
562
669
|
|
563
670
|
class Hash
|
564
|
-
|
671
|
+
def compact
|
565
672
|
self.select { |_, value| !value.nil? }
|
566
673
|
end
|
567
674
|
end
|