schedule-scraper 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +13 -5
- data/lib/schedule-scraper.rb +5 -1
- data/lib/schedule-scraper/event.rb +50 -0
- data/lib/schedule-scraper/ezleagues/event.rb +17 -0
- data/lib/schedule-scraper/ezleagues/schedule.rb +22 -0
- data/lib/schedule-scraper/pointstreak/event.rb +2 -41
- data/lib/schedule-scraper/pointstreak/schedule.rb +0 -4
- data/lib/schedule-scraper/schedule.rb +3 -0
- data/lib/schedule-scraper/version.rb +1 -1
- data/spec/schedule-scraper/event_spec.rb +96 -7
- data/spec/schedule-scraper/ezleagues/event_spec.rb +39 -0
- data/spec/schedule-scraper/ezleagues/schedule_spec.rb +29 -0
- data/spec/schedule-scraper/pointstreak/event_spec.rb +0 -75
- data/spec/schedule-scraper/pointstreak/schedule_spec.rb +0 -16
- data/spec/schedule-scraper/schedule_spec.rb +22 -7
- data/spec/spec_helper.rb +32 -1
- data/spec/vcr_cassettes/schwartz_summer_2012.yml +1150 -0
- metadata +10 -2
data/README.md
CHANGED
@@ -1,12 +1,14 @@
|
|
1
1
|
# ScheduleScraper
|
2
2
|
|
3
3
|
[](http://travis-ci.org/johnallen3d/schedule-scrape)
|
4
|
+
[](https://codeclimate.com/github/johnallen3d/schedule-scrape)
|
4
5
|
|
5
6
|
A web calendar scraper for sites that do not provid portable (csv, i-cal etc) version.
|
6
7
|
|
7
8
|
Supported schedule sites:
|
8
9
|
|
9
10
|
* [PointStreak](http://pointstreak.com)
|
11
|
+
* [EZFacility](http://www.ezfacility.com/)
|
10
12
|
|
11
13
|
Supported output formats:
|
12
14
|
|
@@ -31,8 +33,6 @@ Or install it yourself as:
|
|
31
33
|
|
32
34
|
## Usage
|
33
35
|
|
34
|
-
### Pointstreak Example
|
35
|
-
|
36
36
|
Locate the printable version of the scheulde:
|
37
37
|
|
38
38
|
1. Visit the leagues home page
|
@@ -43,7 +43,15 @@ Locate the printable version of the scheulde:
|
|
43
43
|
Request a schedule:
|
44
44
|
|
45
45
|
url = "http://www.pointstreak.com/players/print/players-team-schedule.html?teamid=385368&seasonid=9162"
|
46
|
-
|
46
|
+
schedule = ScheduleScraper.fetch(:pointstreak, url)
|
47
|
+
|
48
|
+
or
|
49
|
+
|
50
|
+
url = "http://pinevilleice.ezleagues.ezfacility.com/teams/1026121/The-Schwartz.aspx"
|
51
|
+
schedule = ScheduleScraper.fetch(:ezleagues, url)
|
52
|
+
|
53
|
+
then
|
54
|
+
|
47
55
|
|
48
56
|
Export the schedule:
|
49
57
|
|
@@ -59,11 +67,11 @@ or
|
|
59
67
|
|
60
68
|
## TODO
|
61
69
|
|
62
|
-
1. Add other schedule types
|
70
|
+
1. Add other schedule types
|
63
71
|
|
64
72
|
## Why?
|
65
73
|
|
66
|
-
To scratch an itch. I play on a couple of ice hockey teams and the rinks these sites to manage leagues and schedules. These sites do not offer any options for exporing and I got tired of updating my schedule manually every couple of months.
|
74
|
+
To scratch an itch. I play on a couple of ice hockey teams and the rinks use these sites to manage leagues and schedules. These sites do not offer any options for exporing and I got tired of updating my schedule manually every couple of months.
|
67
75
|
|
68
76
|
## Contributing
|
69
77
|
|
data/lib/schedule-scraper.rb
CHANGED
@@ -8,6 +8,8 @@ require "schedule-scraper/event"
|
|
8
8
|
require "schedule-scraper/schedule"
|
9
9
|
require "schedule-scraper/pointstreak/event"
|
10
10
|
require "schedule-scraper/pointstreak/schedule"
|
11
|
+
require "schedule-scraper/ezleagues/event"
|
12
|
+
require "schedule-scraper/ezleagues/schedule"
|
11
13
|
|
12
14
|
module ScheduleScraper
|
13
15
|
def self.fetch(type, url)
|
@@ -20,12 +22,14 @@ module ScheduleScraper
|
|
20
22
|
def self.type_class(type)
|
21
23
|
case type
|
22
24
|
when :pointstreak then Pointstreak::Schedule
|
25
|
+
when :ezleagues then EZLeagues::Schedule
|
23
26
|
end
|
24
27
|
end
|
25
28
|
|
26
29
|
def self.supported_schedules
|
27
30
|
[
|
28
|
-
:pointstreak
|
31
|
+
:pointstreak,
|
32
|
+
:ezleagues
|
29
33
|
]
|
30
34
|
end
|
31
35
|
|
@@ -4,12 +4,62 @@ module ScheduleScraper
|
|
4
4
|
def export_fields
|
5
5
|
self.rules.keys
|
6
6
|
end
|
7
|
+
|
8
|
+
def cleaner; lambda { |value| value.text.strip }; end
|
7
9
|
end
|
8
10
|
|
9
11
|
def self.included(base)
|
10
12
|
base.extend ClassMethods
|
11
13
|
end
|
12
14
|
|
15
|
+
def title
|
16
|
+
"#{home_team} vs. #{away_team}"
|
17
|
+
end
|
18
|
+
|
19
|
+
def all_day?
|
20
|
+
false
|
21
|
+
end
|
22
|
+
|
23
|
+
def description
|
24
|
+
title
|
25
|
+
end
|
26
|
+
|
27
|
+
def start_date
|
28
|
+
Date.parse(date).strftime(date_format)
|
29
|
+
end
|
30
|
+
|
31
|
+
def end_date
|
32
|
+
start_date
|
33
|
+
end
|
34
|
+
|
35
|
+
def start_time
|
36
|
+
begin
|
37
|
+
Time.parse(time)
|
38
|
+
time
|
39
|
+
rescue
|
40
|
+
# looks like an invalid time
|
41
|
+
"12:00 PM"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# def end_time
|
46
|
+
# will default to one hour?
|
47
|
+
# end
|
48
|
+
|
49
|
+
def start_date_time
|
50
|
+
|
51
|
+
DateTime.strptime "#{start_date} #{start_time}", '%m/%d/%y %H:%M %P'
|
52
|
+
end
|
53
|
+
|
54
|
+
def end_date_time
|
55
|
+
# default to 1 hr
|
56
|
+
start_date_time.to_time + 3600
|
57
|
+
end
|
58
|
+
|
59
|
+
def private?
|
60
|
+
true
|
61
|
+
end
|
62
|
+
|
13
63
|
def to_csv
|
14
64
|
self.class.export_fields.collect do |field|
|
15
65
|
self.send(field)
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module ScheduleScraper
|
2
|
+
module EZLeagues
|
3
|
+
class Event < Nibbler
|
4
|
+
include ScheduleScraper::Event
|
5
|
+
|
6
|
+
element 'td:nth(2)' => :home_team, :with => cleaner
|
7
|
+
element 'td:nth(4)' => :away_team, :with => cleaner
|
8
|
+
element 'td:first a' => :date, :with => cleaner
|
9
|
+
element 'td:nth(5)' => :time, :with => cleaner
|
10
|
+
element 'td:nth(6)' => :rink, :with => cleaner
|
11
|
+
|
12
|
+
def date_format
|
13
|
+
"%m/%d/%y"
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module ScheduleScraper
|
2
|
+
module EZLeagues
|
3
|
+
class Schedule < Nibbler
|
4
|
+
include ScheduleScraper::Schedule
|
5
|
+
|
6
|
+
element 'table#ctl00_C_Schedule1_GridView1' => :list do
|
7
|
+
elements 'tr:not(.HeaderStyle)' => :event_list, :with => Event
|
8
|
+
end
|
9
|
+
|
10
|
+
def events
|
11
|
+
list.event_list #.reject { |event| event.away_team.nil? }
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def event_class
|
17
|
+
ScheduleScraper::EZLeagues::Event
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
@@ -3,53 +3,14 @@ module ScheduleScraper
|
|
3
3
|
class Event < Nibbler
|
4
4
|
include ScheduleScraper::Event
|
5
5
|
|
6
|
-
cleaner = lambda { |value| value.text.strip }
|
7
|
-
|
8
6
|
element 'td:first' => :home_team, :with => cleaner
|
9
7
|
element 'td:nth(2)' => :away_team, :with => cleaner
|
10
8
|
element 'td:nth(3)' => :date, :with => cleaner
|
11
9
|
element 'td:nth(4)' => :time, :with => cleaner
|
12
10
|
element 'td:nth(5)' => :rink, :with => cleaner
|
13
11
|
|
14
|
-
def
|
15
|
-
"
|
16
|
-
end
|
17
|
-
|
18
|
-
def start_date
|
19
|
-
Date.parse(date).strftime("%m/%d/%y")
|
20
|
-
end
|
21
|
-
|
22
|
-
def end_date
|
23
|
-
start_date
|
24
|
-
end
|
25
|
-
|
26
|
-
def start_time
|
27
|
-
time
|
28
|
-
end
|
29
|
-
|
30
|
-
# def end_time
|
31
|
-
# will default to one hour?
|
32
|
-
# end
|
33
|
-
|
34
|
-
def start_date_time
|
35
|
-
DateTime.strptime "#{start_date} #{start_time}", '%m/%d/%y %H:%M %P'
|
36
|
-
end
|
37
|
-
|
38
|
-
def end_date_time
|
39
|
-
# default to 1 hr
|
40
|
-
start_date_time.to_time + 3600
|
41
|
-
end
|
42
|
-
|
43
|
-
def all_day?
|
44
|
-
false
|
45
|
-
end
|
46
|
-
|
47
|
-
def description
|
48
|
-
title
|
49
|
-
end
|
50
|
-
|
51
|
-
def private?
|
52
|
-
true
|
12
|
+
def date_format
|
13
|
+
"%m/%d/%y"
|
53
14
|
end
|
54
15
|
end
|
55
16
|
end
|
@@ -1,9 +1,98 @@
|
|
1
|
-
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../spec_helper'))
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
describe ScheduleScraper::Event do
|
4
|
+
subject() { EventTest.new }
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
6
|
+
let(:expected_values) {
|
7
|
+
{
|
8
|
+
:home_team => "home team",
|
9
|
+
:away_team => "away team",
|
10
|
+
:date => "01/01/2013",
|
11
|
+
:time => "09:00 PM"
|
12
|
+
}
|
13
|
+
}
|
14
|
+
|
15
|
+
describe "output helper methods" do
|
16
|
+
it "defines a title" do
|
17
|
+
expected = "#{expected_values[:home_team]} vs. #{expected_values[:away_team]}"
|
18
|
+
subject.title.must_equal expected
|
19
|
+
end
|
20
|
+
|
21
|
+
it "defines a start date" do
|
22
|
+
expected = Date.parse(expected_values[:date]).strftime("%m/%d/%y")
|
23
|
+
subject.start_date.must_equal expected
|
24
|
+
end
|
25
|
+
|
26
|
+
it "defines an end date" do
|
27
|
+
subject.end_date.must_equal subject.start_date
|
28
|
+
end
|
29
|
+
|
30
|
+
it "defines a start time" do
|
31
|
+
subject.start_time.must_equal expected_values[:time]
|
32
|
+
end
|
33
|
+
|
34
|
+
it "defines a description" do
|
35
|
+
subject.description.must_equal subject.title
|
36
|
+
end
|
37
|
+
|
38
|
+
it "defines all day event" do
|
39
|
+
subject.all_day?.must_equal false
|
40
|
+
end
|
41
|
+
|
42
|
+
it "defines all private" do
|
43
|
+
subject.private?.must_equal true
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
describe "#to_gcal" do
|
48
|
+
it "provides an array ready to export to csv" do
|
49
|
+
expected = [
|
50
|
+
subject.title,
|
51
|
+
subject.start_date,
|
52
|
+
subject.start_time,
|
53
|
+
subject.end_date,
|
54
|
+
"",
|
55
|
+
false,
|
56
|
+
subject.description,
|
57
|
+
"",
|
58
|
+
true
|
59
|
+
]
|
60
|
+
|
61
|
+
subject.to_gcal.must_equal expected
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
describe "#to_ical" do
|
66
|
+
it "provides an array ready to export to csv" do
|
67
|
+
local_subject = subject
|
68
|
+
|
69
|
+
RiCal.Calendar do |cal|
|
70
|
+
local_subject.to_ical(cal)
|
71
|
+
end.must_be_instance_of RiCal::Component::Calendar
|
72
|
+
end
|
73
|
+
|
74
|
+
it "handles invalid times" do
|
75
|
+
local_subject = subject
|
76
|
+
subject.time = "this is not a date"
|
77
|
+
|
78
|
+
RiCal.Calendar do |cal|
|
79
|
+
local_subject.to_ical(cal)
|
80
|
+
end.must_be_instance_of RiCal::Component::Calendar
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
describe "#to_h" do
|
85
|
+
it "provides a hash for export" do
|
86
|
+
expected = {
|
87
|
+
:title => subject.title,
|
88
|
+
:start_date => subject.start_date,
|
89
|
+
:start_time => subject.start_time,
|
90
|
+
:end_date => subject.end_date,
|
91
|
+
:all_day => subject.all_day?,
|
92
|
+
:description => subject.description
|
93
|
+
}
|
94
|
+
|
95
|
+
subject.to_h.must_equal expected
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../../spec_helper'))
|
2
|
+
|
3
|
+
describe ScheduleScraper::EZLeagues::Event do
|
4
|
+
let(:options) { EZ_OPTIONS }
|
5
|
+
let(:fields) { [:home_team, :away_team, :date, :time, :rink] }
|
6
|
+
let(:expected_values) do
|
7
|
+
{
|
8
|
+
:home_team => "007",
|
9
|
+
:away_team => "The Schwartz",
|
10
|
+
:date => "Tue-Sep 11",
|
11
|
+
:time => "8:55 PM",
|
12
|
+
:rink => "Pineville Ice House"
|
13
|
+
}
|
14
|
+
end
|
15
|
+
|
16
|
+
before do
|
17
|
+
VCR.use_cassette('schwartz_summer_2012') do
|
18
|
+
@schedule = ScheduleScraper::EZLeagues::Schedule.fetch(options)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
subject() { @schedule.events.last }
|
23
|
+
|
24
|
+
[:home_team, :away_team, :date, :time, :rink].each do |field|
|
25
|
+
it "can find the #{field.to_s.gsub('_', ' ')}" do
|
26
|
+
subject.send(field).wont_be_nil
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
it "uses elements to define fields for csv" do
|
31
|
+
klass = ScheduleScraper::EZLeagues::Event
|
32
|
+
klass.send(:export_fields).must_equal expected_values.keys
|
33
|
+
end
|
34
|
+
|
35
|
+
it "returns a list of fields when you ask for csv" do
|
36
|
+
subject.to_csv.must_equal expected_values.values
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require File.expand_path(File.join(File.dirname(__FILE__), '../../spec_helper'))
|
2
|
+
|
3
|
+
describe ScheduleScraper::EZLeagues::Schedule do
|
4
|
+
subject() { ScheduleScraper::EZLeagues::Schedule }
|
5
|
+
let(:options) { EZ_OPTIONS }
|
6
|
+
|
7
|
+
describe "schedule instance" do
|
8
|
+
subject() do
|
9
|
+
VCR.use_cassette('schwartz_summer_2012') do
|
10
|
+
ScheduleScraper::EZLeagues::Schedule.fetch(options)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
it "returns an instance of itself" do
|
15
|
+
subject.must_be_instance_of ScheduleScraper::EZLeagues::Schedule
|
16
|
+
end
|
17
|
+
|
18
|
+
it "finds an event list" do
|
19
|
+
subject.list.wont_be_nil
|
20
|
+
end
|
21
|
+
|
22
|
+
it "has a list of events" do
|
23
|
+
subject.events.must_be_instance_of Array
|
24
|
+
subject.events.length.must_equal 13
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
|