schedule-scraper 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +13 -5
- data/lib/schedule-scraper.rb +5 -1
- data/lib/schedule-scraper/event.rb +50 -0
- data/lib/schedule-scraper/ezleagues/event.rb +17 -0
- data/lib/schedule-scraper/ezleagues/schedule.rb +22 -0
- data/lib/schedule-scraper/pointstreak/event.rb +2 -41
- data/lib/schedule-scraper/pointstreak/schedule.rb +0 -4
- data/lib/schedule-scraper/schedule.rb +3 -0
- data/lib/schedule-scraper/version.rb +1 -1
- data/spec/schedule-scraper/event_spec.rb +96 -7
- data/spec/schedule-scraper/ezleagues/event_spec.rb +39 -0
- data/spec/schedule-scraper/ezleagues/schedule_spec.rb +29 -0
- data/spec/schedule-scraper/pointstreak/event_spec.rb +0 -75
- data/spec/schedule-scraper/pointstreak/schedule_spec.rb +0 -16
- data/spec/schedule-scraper/schedule_spec.rb +22 -7
- data/spec/spec_helper.rb +32 -1
- data/spec/vcr_cassettes/schwartz_summer_2012.yml +1150 -0
- metadata +10 -2
data/README.md
CHANGED
@@ -1,12 +1,14 @@
|
|
1
1
|
# ScheduleScraper
|
2
2
|
|
3
3
|
[![Build Status](https://secure.travis-ci.org/johnallen3d/schedule-scrape.png?branch=master)](http://travis-ci.org/johnallen3d/schedule-scrape)
|
4
|
+
[![Code Climate](https://codeclimate.com/badge.png)](https://codeclimate.com/github/johnallen3d/schedule-scrape)
|
4
5
|
|
5
6
|
A web calendar scraper for sites that do not provid portable (csv, i-cal etc) version.
|
6
7
|
|
7
8
|
Supported schedule sites:
|
8
9
|
|
9
10
|
* [PointStreak](http://pointstreak.com)
|
11
|
+
* [EZFacility](http://www.ezfacility.com/)
|
10
12
|
|
11
13
|
Supported output formats:
|
12
14
|
|
@@ -31,8 +33,6 @@ Or install it yourself as:
|
|
31
33
|
|
32
34
|
## Usage
|
33
35
|
|
34
|
-
### Pointstreak Example
|
35
|
-
|
36
36
|
Locate the printable version of the scheulde:
|
37
37
|
|
38
38
|
1. Visit the leagues home page
|
@@ -43,7 +43,15 @@ Locate the printable version of the scheulde:
|
|
43
43
|
Request a schedule:
|
44
44
|
|
45
45
|
url = "http://www.pointstreak.com/players/print/players-team-schedule.html?teamid=385368&seasonid=9162"
|
46
|
-
|
46
|
+
schedule = ScheduleScraper.fetch(:pointstreak, url)
|
47
|
+
|
48
|
+
or
|
49
|
+
|
50
|
+
url = "http://pinevilleice.ezleagues.ezfacility.com/teams/1026121/The-Schwartz.aspx"
|
51
|
+
schedule = ScheduleScraper.fetch(:ezleagues, url)
|
52
|
+
|
53
|
+
then
|
54
|
+
|
47
55
|
|
48
56
|
Export the schedule:
|
49
57
|
|
@@ -59,11 +67,11 @@ or
|
|
59
67
|
|
60
68
|
## TODO
|
61
69
|
|
62
|
-
1. Add other schedule types
|
70
|
+
1. Add other schedule types
|
63
71
|
|
64
72
|
## Why?
|
65
73
|
|
66
|
-
To scratch an itch. I play on a couple of ice hockey teams and the rinks these sites to manage leagues and schedules. These sites do not offer any options for exporing and I got tired of updating my schedule manually every couple of months.
|
74
|
+
To scratch an itch. I play on a couple of ice hockey teams and the rinks use these sites to manage leagues and schedules. These sites do not offer any options for exporing and I got tired of updating my schedule manually every couple of months.
|
67
75
|
|
68
76
|
## Contributing
|
69
77
|
|
data/lib/schedule-scraper.rb
CHANGED
@@ -8,6 +8,8 @@ require "schedule-scraper/event"
|
|
8
8
|
require "schedule-scraper/schedule"
|
9
9
|
require "schedule-scraper/pointstreak/event"
|
10
10
|
require "schedule-scraper/pointstreak/schedule"
|
11
|
+
require "schedule-scraper/ezleagues/event"
|
12
|
+
require "schedule-scraper/ezleagues/schedule"
|
11
13
|
|
12
14
|
module ScheduleScraper
|
13
15
|
def self.fetch(type, url)
|
@@ -20,12 +22,14 @@ module ScheduleScraper
|
|
20
22
|
def self.type_class(type)
|
21
23
|
case type
|
22
24
|
when :pointstreak then Pointstreak::Schedule
|
25
|
+
when :ezleagues then EZLeagues::Schedule
|
23
26
|
end
|
24
27
|
end
|
25
28
|
|
26
29
|
def self.supported_schedules
|
27
30
|
[
|
28
|
-
:pointstreak
|
31
|
+
:pointstreak,
|
32
|
+
:ezleagues
|
29
33
|
]
|
30
34
|
end
|
31
35
|
|
@@ -4,12 +4,62 @@ module ScheduleScraper
|
|
4
4
|
def export_fields
|
5
5
|
self.rules.keys
|
6
6
|
end
|
7
|
+
|
8
|
+
def cleaner; lambda { |value| value.text.strip }; end
|
7
9
|
end
|
8
10
|
|
9
11
|
def self.included(base)
|
10
12
|
base.extend ClassMethods
|
11
13
|
end
|
12
14
|
|
15
|
+
def title
|
16
|
+
"#{home_team} vs. #{away_team}"
|
17
|
+
end
|
18
|
+
|
19
|
+
def all_day?
|
20
|
+
false
|
21
|
+
end
|
22
|
+
|
23
|
+
def description
|
24
|
+
title
|
25
|
+
end
|
26
|
+
|
27
|
+
def start_date
|
28
|
+
Date.parse(date).strftime(date_format)
|
29
|
+
end
|
30
|
+
|
31
|
+
def end_date
|
32
|
+
start_date
|
33
|
+
end
|
34
|
+
|
35
|
+
def start_time
|
36
|
+
begin
|
37
|
+
Time.parse(time)
|
38
|
+
time
|
39
|
+
rescue
|
40
|
+
# looks like an invalid time
|
41
|
+
"12:00 PM"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# def end_time
|
46
|
+
# will default to one hour?
|
47
|
+
# end
|
48
|
+
|
49
|
+
def start_date_time
|
50
|
+
|
51
|
+
DateTime.strptime "#{start_date} #{start_time}", '%m/%d/%y %H:%M %P'
|
52
|
+
end
|
53
|
+
|
54
|
+
def end_date_time
|
55
|
+
# default to 1 hr
|
56
|
+
start_date_time.to_time + 3600
|
57
|
+
end
|
58
|
+
|
59
|
+
def private?
|
60
|
+
true
|
61
|
+
end
|
62
|
+
|
13
63
|
def to_csv
|
14
64
|
self.class.export_fields.collect do |field|
|
15
65
|
self.send(field)
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module ScheduleScraper
|
2
|
+
module EZLeagues
|
3
|
+
class Event < Nibbler
|
4
|
+
include ScheduleScraper::Event
|
5
|
+
|
6
|
+
element 'td:nth(2)' => :home_team, :with => cleaner
|
7
|
+
element 'td:nth(4)' => :away_team, :with => cleaner
|
8
|
+
element 'td:first a' => :date, :with => cleaner
|
9
|
+
element 'td:nth(5)' => :time, :with => cleaner
|
10
|
+
element 'td:nth(6)' => :rink, :with => cleaner
|
11
|
+
|
12
|
+
def date_format
|
13
|
+
"%m/%d/%y"
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module ScheduleScraper
|
2
|
+
module EZLeagues
|
3
|
+
class Schedule < Nibbler
|
4
|
+
include ScheduleScraper::Schedule
|
5
|
+
|
6
|
+
element 'table#ctl00_C_Schedule1_GridView1' => :list do
|
7
|
+
elements 'tr:not(.HeaderStyle)' => :event_list, :with => Event
|
8
|
+
end
|
9
|
+
|
10
|
+
def events
|
11
|
+
list.event_list #.reject { |event| event.away_team.nil? }
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def event_class
|
17
|
+
ScheduleScraper::EZLeagues::Event
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
@@ -3,53 +3,14 @@ module ScheduleScraper
|
|
3
3
|
class Event < Nibbler
|
4
4
|
include ScheduleScraper::Event
|
5
5
|
|
6
|
-
cleaner = lambda { |value| value.text.strip }
|
7
|
-
|
8
6
|
element 'td:first' => :home_team, :with => cleaner
|
9
7
|
element 'td:nth(2)' => :away_team, :with => cleaner
|
10
8
|
element 'td:nth(3)' => :date, :with => cleaner
|
11
9
|
element 'td:nth(4)' => :time, :with => cleaner
|
12
10
|
element 'td:nth(5)' => :rink, :with => cleaner
|
13
11
|
|
14
|
-
def
|
15
|
-
"
|
16
|
-
end
|
17
|
-
|
18
|
-
def start_date
|
19
|
-
Date.parse(date).strftime("%m/%d/%y")
|
20
|
-
end
|
21
|
-
|
22
|
-
def end_date
|
23
|
-
start_date
|
24
|
-
end
|
25
|
-
|
26
|
-
def start_time
|
27
|
-
time
|
28
|
-
end
|
29
|
-
|
30
|
-
# def end_time
|
31
|
-
# will default to one hour?
|
32
|
-
# end
|
33
|
-
|
34
|
-
def start_date_time
|
35
|
-
DateTime.strptime "#{start_date} #{start_time}", '%m/%d/%y %H:%M %P'
|
36
|
-
end
|
37
|
-
|
38
|
-
def end_date_time
|
39
|
-
# default to 1 hr
|
40
|
-
start_date_time.to_time + 3600
|
41
|
-
end
|
42
|
-
|
43
|
-
def all_day?
|
44
|
-
false
|
45
|
-
end
|
46
|
-
|
47
|
-
def description
|
48
|
-
title
|
49
|
-
end
|
50
|
-
|
51
|
-
def private?
|
52
|
-
true
|
12
|
+
def date_format
|
13
|
+
"%m/%d/%y"
|
53
14
|
end
|
54
15
|
end
|
55
16
|
end
|
@@ -1,9 +1,98 @@
|
|
1
|
-
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../spec_helper'))
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
describe ScheduleScraper::Event do
|
4
|
+
subject() { EventTest.new }
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
6
|
+
let(:expected_values) {
|
7
|
+
{
|
8
|
+
:home_team => "home team",
|
9
|
+
:away_team => "away team",
|
10
|
+
:date => "01/01/2013",
|
11
|
+
:time => "09:00 PM"
|
12
|
+
}
|
13
|
+
}
|
14
|
+
|
15
|
+
describe "output helper methods" do
|
16
|
+
it "defines a title" do
|
17
|
+
expected = "#{expected_values[:home_team]} vs. #{expected_values[:away_team]}"
|
18
|
+
subject.title.must_equal expected
|
19
|
+
end
|
20
|
+
|
21
|
+
it "defines a start date" do
|
22
|
+
expected = Date.parse(expected_values[:date]).strftime("%m/%d/%y")
|
23
|
+
subject.start_date.must_equal expected
|
24
|
+
end
|
25
|
+
|
26
|
+
it "defines an end date" do
|
27
|
+
subject.end_date.must_equal subject.start_date
|
28
|
+
end
|
29
|
+
|
30
|
+
it "defines a start time" do
|
31
|
+
subject.start_time.must_equal expected_values[:time]
|
32
|
+
end
|
33
|
+
|
34
|
+
it "defines a description" do
|
35
|
+
subject.description.must_equal subject.title
|
36
|
+
end
|
37
|
+
|
38
|
+
it "defines all day event" do
|
39
|
+
subject.all_day?.must_equal false
|
40
|
+
end
|
41
|
+
|
42
|
+
it "defines all private" do
|
43
|
+
subject.private?.must_equal true
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
describe "#to_gcal" do
|
48
|
+
it "provides an array ready to export to csv" do
|
49
|
+
expected = [
|
50
|
+
subject.title,
|
51
|
+
subject.start_date,
|
52
|
+
subject.start_time,
|
53
|
+
subject.end_date,
|
54
|
+
"",
|
55
|
+
false,
|
56
|
+
subject.description,
|
57
|
+
"",
|
58
|
+
true
|
59
|
+
]
|
60
|
+
|
61
|
+
subject.to_gcal.must_equal expected
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
describe "#to_ical" do
|
66
|
+
it "provides an array ready to export to csv" do
|
67
|
+
local_subject = subject
|
68
|
+
|
69
|
+
RiCal.Calendar do |cal|
|
70
|
+
local_subject.to_ical(cal)
|
71
|
+
end.must_be_instance_of RiCal::Component::Calendar
|
72
|
+
end
|
73
|
+
|
74
|
+
it "handles invalid times" do
|
75
|
+
local_subject = subject
|
76
|
+
subject.time = "this is not a date"
|
77
|
+
|
78
|
+
RiCal.Calendar do |cal|
|
79
|
+
local_subject.to_ical(cal)
|
80
|
+
end.must_be_instance_of RiCal::Component::Calendar
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
describe "#to_h" do
|
85
|
+
it "provides a hash for export" do
|
86
|
+
expected = {
|
87
|
+
:title => subject.title,
|
88
|
+
:start_date => subject.start_date,
|
89
|
+
:start_time => subject.start_time,
|
90
|
+
:end_date => subject.end_date,
|
91
|
+
:all_day => subject.all_day?,
|
92
|
+
:description => subject.description
|
93
|
+
}
|
94
|
+
|
95
|
+
subject.to_h.must_equal expected
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../../spec_helper'))
|
2
|
+
|
3
|
+
describe ScheduleScraper::EZLeagues::Event do
|
4
|
+
let(:options) { EZ_OPTIONS }
|
5
|
+
let(:fields) { [:home_team, :away_team, :date, :time, :rink] }
|
6
|
+
let(:expected_values) do
|
7
|
+
{
|
8
|
+
:home_team => "007",
|
9
|
+
:away_team => "The Schwartz",
|
10
|
+
:date => "Tue-Sep 11",
|
11
|
+
:time => "8:55 PM",
|
12
|
+
:rink => "Pineville Ice House"
|
13
|
+
}
|
14
|
+
end
|
15
|
+
|
16
|
+
before do
|
17
|
+
VCR.use_cassette('schwartz_summer_2012') do
|
18
|
+
@schedule = ScheduleScraper::EZLeagues::Schedule.fetch(options)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
subject() { @schedule.events.last }
|
23
|
+
|
24
|
+
[:home_team, :away_team, :date, :time, :rink].each do |field|
|
25
|
+
it "can find the #{field.to_s.gsub('_', ' ')}" do
|
26
|
+
subject.send(field).wont_be_nil
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
it "uses elements to define fields for csv" do
|
31
|
+
klass = ScheduleScraper::EZLeagues::Event
|
32
|
+
klass.send(:export_fields).must_equal expected_values.keys
|
33
|
+
end
|
34
|
+
|
35
|
+
it "returns a list of fields when you ask for csv" do
|
36
|
+
subject.to_csv.must_equal expected_values.values
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../../spec_helper'))
|
2
|
+
|
3
|
+
describe ScheduleScraper::EZLeagues::Schedule do
|
4
|
+
subject() { ScheduleScraper::EZLeagues::Schedule }
|
5
|
+
let(:options) { EZ_OPTIONS }
|
6
|
+
|
7
|
+
describe "schedule instance" do
|
8
|
+
subject() do
|
9
|
+
VCR.use_cassette('schwartz_summer_2012') do
|
10
|
+
ScheduleScraper::EZLeagues::Schedule.fetch(options)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
it "returns an instance of itself" do
|
15
|
+
subject.must_be_instance_of ScheduleScraper::EZLeagues::Schedule
|
16
|
+
end
|
17
|
+
|
18
|
+
it "finds an event list" do
|
19
|
+
subject.list.wont_be_nil
|
20
|
+
end
|
21
|
+
|
22
|
+
it "has a list of events" do
|
23
|
+
subject.events.must_be_instance_of Array
|
24
|
+
subject.events.length.must_equal 13
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
|