schedule-scraper 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +17 -2
- data/lib/schedule-scraper/event.rb +14 -0
- data/lib/schedule-scraper/pointstreak/event.rb +32 -0
- data/lib/schedule-scraper/pointstreak/schedule.rb +2 -12
- data/lib/schedule-scraper/schedule.rb +21 -0
- data/lib/schedule-scraper/version.rb +1 -1
- data/lib/schedule-scraper.rb +13 -2
- data/schedule-scraper.gemspec +1 -0
- data/spec/schedule-scraper/pointstreak/event_spec.rb +61 -3
- data/spec/schedule-scraper/pointstreak/schedule_spec.rb +5 -17
- data/spec/schedule-scraper/schedule_spec.rb +5 -7
- data/spec/schedule-scraper_spec.rb +7 -1
- data/spec/spec_helper.rb +2 -1
- metadata +25 -14
data/README.md
CHANGED
@@ -8,7 +8,8 @@ Supported schedule sites:
|
|
8
8
|
|
9
9
|
Supported output formats:
|
10
10
|
|
11
|
-
* CSV
|
11
|
+
* CSV (plain dump)
|
12
|
+
* Google Calendar formatted CSV
|
12
13
|
|
13
14
|
## Installation
|
14
15
|
|
@@ -26,14 +27,28 @@ Or install it yourself as:
|
|
26
27
|
|
27
28
|
## Usage
|
28
29
|
|
30
|
+
### Pointstreak Example
|
31
|
+
|
32
|
+
Locate the printable version of the scheulde:
|
33
|
+
|
34
|
+
1. Visit the leagues home page
|
35
|
+
2. Click on the team in question
|
36
|
+
3. Click the SCHEDULE link under TEAM MENU
|
37
|
+
4. Click PRINT THIS PAGE
|
38
|
+
|
29
39
|
Request a schedule:
|
30
40
|
|
31
|
-
|
41
|
+
url = "http://www.pointstreak.com/players/print/players-team-schedule.html?teamid=385368&seasonid=9162"
|
42
|
+
schedule = ScheduleScrape.fetch(:point_streak, url)
|
32
43
|
|
33
44
|
Export the schedule to CSV:
|
34
45
|
|
35
46
|
schedule.to_csv
|
36
47
|
|
48
|
+
or
|
49
|
+
|
50
|
+
schedule.to_gcal
|
51
|
+
|
37
52
|
## TODO
|
38
53
|
|
39
54
|
1. Add more export options: iCal, Google Calendar (csv)
|
@@ -10,6 +10,38 @@ module ScheduleScraper
|
|
10
10
|
element 'td:nth(3)' => :date, :with => cleaner
|
11
11
|
element 'td:nth(4)' => :time, :with => cleaner
|
12
12
|
element 'td:nth(5)' => :rink, :with => cleaner
|
13
|
+
|
14
|
+
def title
|
15
|
+
"#{home_team} vs. #{away_team}"
|
16
|
+
end
|
17
|
+
|
18
|
+
def start_date
|
19
|
+
Date.parse(date).strftime("%m/%d/%y")
|
20
|
+
end
|
21
|
+
|
22
|
+
def end_date
|
23
|
+
start_date
|
24
|
+
end
|
25
|
+
|
26
|
+
def start_time
|
27
|
+
time
|
28
|
+
end
|
29
|
+
|
30
|
+
# def end_time
|
31
|
+
# will default to one hour?
|
32
|
+
# end
|
33
|
+
|
34
|
+
def all_day?
|
35
|
+
false
|
36
|
+
end
|
37
|
+
|
38
|
+
def description
|
39
|
+
title
|
40
|
+
end
|
41
|
+
|
42
|
+
def private?
|
43
|
+
true
|
44
|
+
end
|
13
45
|
end
|
14
46
|
end
|
15
47
|
end
|
@@ -3,14 +3,12 @@ module ScheduleScraper
|
|
3
3
|
class Schedule < Nibbler
|
4
4
|
include ScheduleScraper::Schedule
|
5
5
|
|
6
|
-
POINT_STREAK_URL = "http://www.pointstreak.com/players/print/players-team-schedule.html"
|
7
|
-
|
8
6
|
element 'table table:last' => :list do
|
9
7
|
elements 'tr:not(.fields)' => :event_list, :with => Event
|
10
8
|
end
|
11
9
|
|
12
|
-
def self.fetch(
|
13
|
-
parse
|
10
|
+
def self.fetch(url)
|
11
|
+
parse open(url)
|
14
12
|
end
|
15
13
|
|
16
14
|
def events
|
@@ -19,14 +17,6 @@ module ScheduleScraper
|
|
19
17
|
|
20
18
|
private
|
21
19
|
|
22
|
-
def self.html(season, team)
|
23
|
-
open(source_url(season, team))
|
24
|
-
end
|
25
|
-
|
26
|
-
def self.source_url(season, team)
|
27
|
-
"#{POINT_STREAK_URL}?teamid=#{team}&seasonid=#{season}"
|
28
|
-
end
|
29
|
-
|
30
20
|
def event_class
|
31
21
|
ScheduleScraper::Pointstreak::Event
|
32
22
|
end
|
@@ -15,5 +15,26 @@ module ScheduleScraper
|
|
15
15
|
end
|
16
16
|
end
|
17
17
|
end
|
18
|
+
|
19
|
+
def to_gcal
|
20
|
+
headers = [
|
21
|
+
"Subject",
|
22
|
+
"Start Date",
|
23
|
+
"Start Time",
|
24
|
+
"End Date",
|
25
|
+
"End Time",
|
26
|
+
"All Day Event",
|
27
|
+
"Description",
|
28
|
+
"Location",
|
29
|
+
"Private"
|
30
|
+
]
|
31
|
+
|
32
|
+
CSV.generate do |csv|
|
33
|
+
csv << headers
|
34
|
+
events.each do |event|
|
35
|
+
csv << event.to_gcal
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
18
39
|
end
|
19
40
|
end
|
data/lib/schedule-scraper.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
require 'nibbler'
|
2
2
|
require 'open-uri'
|
3
3
|
require 'csv'
|
4
|
+
require 'uri'
|
5
|
+
require 'nokogiri'
|
4
6
|
require "schedule-scraper/version"
|
5
7
|
require "schedule-scraper/event"
|
6
8
|
require "schedule-scraper/schedule"
|
@@ -8,10 +10,11 @@ require "schedule-scraper/pointstreak/event"
|
|
8
10
|
require "schedule-scraper/pointstreak/schedule"
|
9
11
|
|
10
12
|
module ScheduleScraper
|
11
|
-
def self.fetch(type,
|
13
|
+
def self.fetch(type, url)
|
12
14
|
raise UnsupportedSchedule unless supported_schedules.include?(type.to_sym)
|
15
|
+
raise InvalidURL unless valid_url?(url)
|
13
16
|
|
14
|
-
type_class(type).fetch(
|
17
|
+
type_class(type).fetch(url)
|
15
18
|
end
|
16
19
|
|
17
20
|
def self.type_class(type)
|
@@ -26,5 +29,13 @@ module ScheduleScraper
|
|
26
29
|
]
|
27
30
|
end
|
28
31
|
|
32
|
+
def self.valid_url?(url)
|
33
|
+
uri = URI.parse(url)
|
34
|
+
uri.kind_of?(URI::HTTP)
|
35
|
+
rescue URI::InvalidURIError
|
36
|
+
false
|
37
|
+
end
|
38
|
+
|
29
39
|
class UnsupportedSchedule < StandardError; end
|
40
|
+
class InvalidURL < StandardError; end
|
30
41
|
end
|
data/schedule-scraper.gemspec
CHANGED
@@ -16,6 +16,7 @@ Gem::Specification.new do |gem|
|
|
16
16
|
gem.version = ScheduleScraper::VERSION
|
17
17
|
|
18
18
|
gem.add_dependency 'nibbler', '~> 1.3.0'
|
19
|
+
gem.add_dependency 'nokogiri', '~> 1.5.4'
|
19
20
|
|
20
21
|
gem.add_development_dependency 'minitest', '~> 3.1.0'
|
21
22
|
gem.add_development_dependency 'rake', '~> 0.9.2'
|
@@ -3,6 +3,15 @@ require File.expand_path(File.join(File.dirname(__FILE__), '../../spec_helper'))
|
|
3
3
|
describe ScheduleScraper::Pointstreak::Event do
|
4
4
|
let(:options) { POINTSTREAK_OPTIONS }
|
5
5
|
let(:fields) { [:home_team, :away_team, :date, :time, :rink] }
|
6
|
+
let(:expected_values) do
|
7
|
+
{
|
8
|
+
:home_team => "BLADES 6",
|
9
|
+
:away_team => "SUMMIT 8",
|
10
|
+
:date => "Sun, Jun 03",
|
11
|
+
:time => "7:45 pm",
|
12
|
+
:rink => "final"
|
13
|
+
}
|
14
|
+
end
|
6
15
|
|
7
16
|
before do
|
8
17
|
VCR.use_cassette('summit_summer_2012') do
|
@@ -20,11 +29,60 @@ describe ScheduleScraper::Pointstreak::Event do
|
|
20
29
|
|
21
30
|
it "uses elements to define fields for csv" do
|
22
31
|
klass = ScheduleScraper::Pointstreak::Event
|
23
|
-
klass.send(:export_fields).must_equal
|
32
|
+
klass.send(:export_fields).must_equal expected_values.keys
|
24
33
|
end
|
25
34
|
|
26
35
|
it "returns a list of fields when you ask for csv" do
|
27
|
-
|
28
|
-
|
36
|
+
subject.to_csv.must_equal expected_values.values
|
37
|
+
end
|
38
|
+
|
39
|
+
describe "output helper methods" do
|
40
|
+
it "defines a title" do
|
41
|
+
expected = "#{expected_values[:home_team]} vs. #{expected_values[:away_team]}"
|
42
|
+
subject.title.must_equal expected
|
43
|
+
end
|
44
|
+
|
45
|
+
it "defines a start date" do
|
46
|
+
expected = Date.parse(expected_values[:date]).strftime("%m/%d/%y")
|
47
|
+
subject.start_date.must_equal expected
|
48
|
+
end
|
49
|
+
|
50
|
+
it "defines an end date" do
|
51
|
+
subject.end_date.must_equal subject.start_date
|
52
|
+
end
|
53
|
+
|
54
|
+
it "defines a start time" do
|
55
|
+
subject.start_time.must_equal expected_values[:time]
|
56
|
+
end
|
57
|
+
|
58
|
+
it "defines a description" do
|
59
|
+
subject.description.must_equal subject.title
|
60
|
+
end
|
61
|
+
|
62
|
+
it "defines all day event" do
|
63
|
+
subject.all_day?.must_equal false
|
64
|
+
end
|
65
|
+
|
66
|
+
it "defines all private" do
|
67
|
+
subject.private?.must_equal true
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
describe "#to_gcal" do
|
72
|
+
it "provides an array ready to export to csv" do
|
73
|
+
expected = [
|
74
|
+
subject.title,
|
75
|
+
subject.start_date,
|
76
|
+
subject.start_time,
|
77
|
+
subject.end_date,
|
78
|
+
"",
|
79
|
+
false,
|
80
|
+
subject.description,
|
81
|
+
"",
|
82
|
+
true
|
83
|
+
]
|
84
|
+
|
85
|
+
subject.to_gcal.must_equal expected
|
86
|
+
end
|
29
87
|
end
|
30
88
|
end
|
@@ -4,23 +4,6 @@ describe ScheduleScraper::Pointstreak::Schedule do
|
|
4
4
|
subject() { ScheduleScraper::Pointstreak::Schedule }
|
5
5
|
let(:options) { POINTSTREAK_OPTIONS }
|
6
6
|
|
7
|
-
it "knows the root pointstreak url" do
|
8
|
-
subject::POINT_STREAK_URL.must_match /pointstreak/
|
9
|
-
end
|
10
|
-
|
11
|
-
it "builds a valid url" do
|
12
|
-
url = subject.send(:source_url, "123", "456")
|
13
|
-
expected = "#{subject::POINT_STREAK_URL}?teamid=456&seasonid=123"
|
14
|
-
|
15
|
-
url.must_equal expected
|
16
|
-
end
|
17
|
-
|
18
|
-
it "fetches html from pointstreak" do
|
19
|
-
VCR.use_cassette('summit_summer_2012') do
|
20
|
-
subject.html(options[:season], options[:team])
|
21
|
-
end # wont_raise
|
22
|
-
end
|
23
|
-
|
24
7
|
describe "schedule instance" do
|
25
8
|
subject() do
|
26
9
|
VCR.use_cassette('summit_summer_2012') do
|
@@ -44,5 +27,10 @@ describe ScheduleScraper::Pointstreak::Schedule do
|
|
44
27
|
it "generates a csv file" do
|
45
28
|
subject.to_csv.must_be_instance_of String
|
46
29
|
end
|
30
|
+
|
31
|
+
it "generates a google calendar formatted csv" do
|
32
|
+
p subject.to_gcal
|
33
|
+
subject.to_gcal.must_be_instance_of String
|
34
|
+
end
|
47
35
|
end
|
48
36
|
end
|
@@ -1,11 +1,9 @@
|
|
1
1
|
# require File.expand_path(File.join(File.dirname(__FILE__), '../spec_helper'))
|
2
2
|
|
3
|
-
#
|
4
|
-
#
|
3
|
+
# class ScraperTest
|
4
|
+
# include ScheduleScraper::Schedule
|
5
|
+
# end
|
5
6
|
|
6
|
-
#
|
7
|
-
#
|
8
|
-
# subject.to_csv.must_be_instance_of String
|
9
|
-
# end
|
10
|
-
# end
|
7
|
+
# describe ScheduleScraper::Schedule do
|
8
|
+
# subject() { ScraperTest.new(:xyz, :url => "http://www.xyz.com") }
|
11
9
|
# end
|
@@ -11,10 +11,16 @@ describe ScheduleScraper do
|
|
11
11
|
end
|
12
12
|
|
13
13
|
-> {
|
14
|
-
ScheduleScraper.fetch(:xyz)
|
14
|
+
ScheduleScraper.fetch(:xyz, options)
|
15
15
|
}.must_raise ScheduleScraper::UnsupportedSchedule
|
16
16
|
end
|
17
17
|
|
18
|
+
it "validates the url" do
|
19
|
+
-> {
|
20
|
+
ScheduleScraper.fetch(:pointstreak, "abc")
|
21
|
+
}.must_raise ScheduleScraper::InvalidURL
|
22
|
+
end
|
23
|
+
|
18
24
|
it "returns a schedule" do
|
19
25
|
VCR.use_cassette('summit_summer_2012') do
|
20
26
|
schedule = ScheduleScraper.fetch(:pointstreak, options)
|
data/spec/spec_helper.rb
CHANGED
@@ -5,7 +5,8 @@ require 'vcr'
|
|
5
5
|
|
6
6
|
require File.expand_path(File.join(File.dirname(__FILE__), '../lib/schedule-scraper'))
|
7
7
|
|
8
|
-
POINTSTREAK_OPTIONS = { :season => "9162", :team => "385368" }
|
8
|
+
# POINTSTREAK_OPTIONS = { :season => "9162", :team => "385368" }
|
9
|
+
POINTSTREAK_OPTIONS = "http://www.pointstreak.com/players/print/players-team-schedule.html?teamid=385368&seasonid=9162"
|
9
10
|
|
10
11
|
VCR.configure do |c|
|
11
12
|
c.cassette_library_dir = 'spec/vcr_cassettes'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: schedule-scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-06-
|
12
|
+
date: 2012-06-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nibbler
|
16
|
-
requirement: &
|
16
|
+
requirement: &70168497621260 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,21 @@ dependencies:
|
|
21
21
|
version: 1.3.0
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70168497621260
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: nokogiri
|
27
|
+
requirement: &70168497620760 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ~>
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 1.5.4
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70168497620760
|
25
36
|
- !ruby/object:Gem::Dependency
|
26
37
|
name: minitest
|
27
|
-
requirement: &
|
38
|
+
requirement: &70168497620300 !ruby/object:Gem::Requirement
|
28
39
|
none: false
|
29
40
|
requirements:
|
30
41
|
- - ~>
|
@@ -32,10 +43,10 @@ dependencies:
|
|
32
43
|
version: 3.1.0
|
33
44
|
type: :development
|
34
45
|
prerelease: false
|
35
|
-
version_requirements: *
|
46
|
+
version_requirements: *70168497620300
|
36
47
|
- !ruby/object:Gem::Dependency
|
37
48
|
name: rake
|
38
|
-
requirement: &
|
49
|
+
requirement: &70168497619820 !ruby/object:Gem::Requirement
|
39
50
|
none: false
|
40
51
|
requirements:
|
41
52
|
- - ~>
|
@@ -43,10 +54,10 @@ dependencies:
|
|
43
54
|
version: 0.9.2
|
44
55
|
type: :development
|
45
56
|
prerelease: false
|
46
|
-
version_requirements: *
|
57
|
+
version_requirements: *70168497619820
|
47
58
|
- !ruby/object:Gem::Dependency
|
48
59
|
name: turn
|
49
|
-
requirement: &
|
60
|
+
requirement: &70168497619320 !ruby/object:Gem::Requirement
|
50
61
|
none: false
|
51
62
|
requirements:
|
52
63
|
- - ~>
|
@@ -54,10 +65,10 @@ dependencies:
|
|
54
65
|
version: 0.9.5
|
55
66
|
type: :development
|
56
67
|
prerelease: false
|
57
|
-
version_requirements: *
|
68
|
+
version_requirements: *70168497619320
|
58
69
|
- !ruby/object:Gem::Dependency
|
59
70
|
name: vcr
|
60
|
-
requirement: &
|
71
|
+
requirement: &70168497635140 !ruby/object:Gem::Requirement
|
61
72
|
none: false
|
62
73
|
requirements:
|
63
74
|
- - ~>
|
@@ -65,10 +76,10 @@ dependencies:
|
|
65
76
|
version: 2.2.0
|
66
77
|
type: :development
|
67
78
|
prerelease: false
|
68
|
-
version_requirements: *
|
79
|
+
version_requirements: *70168497635140
|
69
80
|
- !ruby/object:Gem::Dependency
|
70
81
|
name: fakeweb
|
71
|
-
requirement: &
|
82
|
+
requirement: &70168497634600 !ruby/object:Gem::Requirement
|
72
83
|
none: false
|
73
84
|
requirements:
|
74
85
|
- - ~>
|
@@ -76,7 +87,7 @@ dependencies:
|
|
76
87
|
version: 1.3.0
|
77
88
|
type: :development
|
78
89
|
prerelease: false
|
79
|
-
version_requirements: *
|
90
|
+
version_requirements: *70168497634600
|
80
91
|
description: Scrapes online schedules and provides portable versions
|
81
92
|
email:
|
82
93
|
- john@threedogconsulting.com
|