schedule-scraper 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +17 -2
- data/lib/schedule-scraper/event.rb +14 -0
- data/lib/schedule-scraper/pointstreak/event.rb +32 -0
- data/lib/schedule-scraper/pointstreak/schedule.rb +2 -12
- data/lib/schedule-scraper/schedule.rb +21 -0
- data/lib/schedule-scraper/version.rb +1 -1
- data/lib/schedule-scraper.rb +13 -2
- data/schedule-scraper.gemspec +1 -0
- data/spec/schedule-scraper/pointstreak/event_spec.rb +61 -3
- data/spec/schedule-scraper/pointstreak/schedule_spec.rb +5 -17
- data/spec/schedule-scraper/schedule_spec.rb +5 -7
- data/spec/schedule-scraper_spec.rb +7 -1
- data/spec/spec_helper.rb +2 -1
- metadata +25 -14
data/README.md
CHANGED
@@ -8,7 +8,8 @@ Supported schedule sites:
|
|
8
8
|
|
9
9
|
Supported output formats:
|
10
10
|
|
11
|
-
* CSV
|
11
|
+
* CSV (plain dump)
|
12
|
+
* Google Calendar formatted CSV
|
12
13
|
|
13
14
|
## Installation
|
14
15
|
|
@@ -26,14 +27,28 @@ Or install it yourself as:
|
|
26
27
|
|
27
28
|
## Usage
|
28
29
|
|
30
|
+
### Pointstreak Example
|
31
|
+
|
32
|
+
Locate the printable version of the scheulde:
|
33
|
+
|
34
|
+
1. Visit the leagues home page
|
35
|
+
2. Click on the team in question
|
36
|
+
3. Click the SCHEDULE link under TEAM MENU
|
37
|
+
4. Click PRINT THIS PAGE
|
38
|
+
|
29
39
|
Request a schedule:
|
30
40
|
|
31
|
-
|
41
|
+
url = "http://www.pointstreak.com/players/print/players-team-schedule.html?teamid=385368&seasonid=9162"
|
42
|
+
schedule = ScheduleScrape.fetch(:point_streak, url)
|
32
43
|
|
33
44
|
Export the schedule to CSV:
|
34
45
|
|
35
46
|
schedule.to_csv
|
36
47
|
|
48
|
+
or
|
49
|
+
|
50
|
+
schedule.to_gcal
|
51
|
+
|
37
52
|
## TODO
|
38
53
|
|
39
54
|
1. Add more export options: iCal, Google Calendar (csv)
|
@@ -10,6 +10,38 @@ module ScheduleScraper
|
|
10
10
|
element 'td:nth(3)' => :date, :with => cleaner
|
11
11
|
element 'td:nth(4)' => :time, :with => cleaner
|
12
12
|
element 'td:nth(5)' => :rink, :with => cleaner
|
13
|
+
|
14
|
+
def title
|
15
|
+
"#{home_team} vs. #{away_team}"
|
16
|
+
end
|
17
|
+
|
18
|
+
def start_date
|
19
|
+
Date.parse(date).strftime("%m/%d/%y")
|
20
|
+
end
|
21
|
+
|
22
|
+
def end_date
|
23
|
+
start_date
|
24
|
+
end
|
25
|
+
|
26
|
+
def start_time
|
27
|
+
time
|
28
|
+
end
|
29
|
+
|
30
|
+
# def end_time
|
31
|
+
# will default to one hour?
|
32
|
+
# end
|
33
|
+
|
34
|
+
def all_day?
|
35
|
+
false
|
36
|
+
end
|
37
|
+
|
38
|
+
def description
|
39
|
+
title
|
40
|
+
end
|
41
|
+
|
42
|
+
def private?
|
43
|
+
true
|
44
|
+
end
|
13
45
|
end
|
14
46
|
end
|
15
47
|
end
|
@@ -3,14 +3,12 @@ module ScheduleScraper
|
|
3
3
|
class Schedule < Nibbler
|
4
4
|
include ScheduleScraper::Schedule
|
5
5
|
|
6
|
-
POINT_STREAK_URL = "http://www.pointstreak.com/players/print/players-team-schedule.html"
|
7
|
-
|
8
6
|
element 'table table:last' => :list do
|
9
7
|
elements 'tr:not(.fields)' => :event_list, :with => Event
|
10
8
|
end
|
11
9
|
|
12
|
-
def self.fetch(
|
13
|
-
parse
|
10
|
+
def self.fetch(url)
|
11
|
+
parse open(url)
|
14
12
|
end
|
15
13
|
|
16
14
|
def events
|
@@ -19,14 +17,6 @@ module ScheduleScraper
|
|
19
17
|
|
20
18
|
private
|
21
19
|
|
22
|
-
def self.html(season, team)
|
23
|
-
open(source_url(season, team))
|
24
|
-
end
|
25
|
-
|
26
|
-
def self.source_url(season, team)
|
27
|
-
"#{POINT_STREAK_URL}?teamid=#{team}&seasonid=#{season}"
|
28
|
-
end
|
29
|
-
|
30
20
|
def event_class
|
31
21
|
ScheduleScraper::Pointstreak::Event
|
32
22
|
end
|
@@ -15,5 +15,26 @@ module ScheduleScraper
|
|
15
15
|
end
|
16
16
|
end
|
17
17
|
end
|
18
|
+
|
19
|
+
def to_gcal
|
20
|
+
headers = [
|
21
|
+
"Subject",
|
22
|
+
"Start Date",
|
23
|
+
"Start Time",
|
24
|
+
"End Date",
|
25
|
+
"End Time",
|
26
|
+
"All Day Event",
|
27
|
+
"Description",
|
28
|
+
"Location",
|
29
|
+
"Private"
|
30
|
+
]
|
31
|
+
|
32
|
+
CSV.generate do |csv|
|
33
|
+
csv << headers
|
34
|
+
events.each do |event|
|
35
|
+
csv << event.to_gcal
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
18
39
|
end
|
19
40
|
end
|
data/lib/schedule-scraper.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
require 'nibbler'
|
2
2
|
require 'open-uri'
|
3
3
|
require 'csv'
|
4
|
+
require 'uri'
|
5
|
+
require 'nokogiri'
|
4
6
|
require "schedule-scraper/version"
|
5
7
|
require "schedule-scraper/event"
|
6
8
|
require "schedule-scraper/schedule"
|
@@ -8,10 +10,11 @@ require "schedule-scraper/pointstreak/event"
|
|
8
10
|
require "schedule-scraper/pointstreak/schedule"
|
9
11
|
|
10
12
|
module ScheduleScraper
|
11
|
-
def self.fetch(type,
|
13
|
+
def self.fetch(type, url)
|
12
14
|
raise UnsupportedSchedule unless supported_schedules.include?(type.to_sym)
|
15
|
+
raise InvalidURL unless valid_url?(url)
|
13
16
|
|
14
|
-
type_class(type).fetch(
|
17
|
+
type_class(type).fetch(url)
|
15
18
|
end
|
16
19
|
|
17
20
|
def self.type_class(type)
|
@@ -26,5 +29,13 @@ module ScheduleScraper
|
|
26
29
|
]
|
27
30
|
end
|
28
31
|
|
32
|
+
def self.valid_url?(url)
|
33
|
+
uri = URI.parse(url)
|
34
|
+
uri.kind_of?(URI::HTTP)
|
35
|
+
rescue URI::InvalidURIError
|
36
|
+
false
|
37
|
+
end
|
38
|
+
|
29
39
|
class UnsupportedSchedule < StandardError; end
|
40
|
+
class InvalidURL < StandardError; end
|
30
41
|
end
|
data/schedule-scraper.gemspec
CHANGED
@@ -16,6 +16,7 @@ Gem::Specification.new do |gem|
|
|
16
16
|
gem.version = ScheduleScraper::VERSION
|
17
17
|
|
18
18
|
gem.add_dependency 'nibbler', '~> 1.3.0'
|
19
|
+
gem.add_dependency 'nokogiri', '~> 1.5.4'
|
19
20
|
|
20
21
|
gem.add_development_dependency 'minitest', '~> 3.1.0'
|
21
22
|
gem.add_development_dependency 'rake', '~> 0.9.2'
|
@@ -3,6 +3,15 @@ require File.expand_path(File.join(File.dirname(__FILE__), '../../spec_helper'))
|
|
3
3
|
describe ScheduleScraper::Pointstreak::Event do
|
4
4
|
let(:options) { POINTSTREAK_OPTIONS }
|
5
5
|
let(:fields) { [:home_team, :away_team, :date, :time, :rink] }
|
6
|
+
let(:expected_values) do
|
7
|
+
{
|
8
|
+
:home_team => "BLADES 6",
|
9
|
+
:away_team => "SUMMIT 8",
|
10
|
+
:date => "Sun, Jun 03",
|
11
|
+
:time => "7:45 pm",
|
12
|
+
:rink => "final"
|
13
|
+
}
|
14
|
+
end
|
6
15
|
|
7
16
|
before do
|
8
17
|
VCR.use_cassette('summit_summer_2012') do
|
@@ -20,11 +29,60 @@ describe ScheduleScraper::Pointstreak::Event do
|
|
20
29
|
|
21
30
|
it "uses elements to define fields for csv" do
|
22
31
|
klass = ScheduleScraper::Pointstreak::Event
|
23
|
-
klass.send(:export_fields).must_equal
|
32
|
+
klass.send(:export_fields).must_equal expected_values.keys
|
24
33
|
end
|
25
34
|
|
26
35
|
it "returns a list of fields when you ask for csv" do
|
27
|
-
|
28
|
-
|
36
|
+
subject.to_csv.must_equal expected_values.values
|
37
|
+
end
|
38
|
+
|
39
|
+
describe "output helper methods" do
|
40
|
+
it "defines a title" do
|
41
|
+
expected = "#{expected_values[:home_team]} vs. #{expected_values[:away_team]}"
|
42
|
+
subject.title.must_equal expected
|
43
|
+
end
|
44
|
+
|
45
|
+
it "defines a start date" do
|
46
|
+
expected = Date.parse(expected_values[:date]).strftime("%m/%d/%y")
|
47
|
+
subject.start_date.must_equal expected
|
48
|
+
end
|
49
|
+
|
50
|
+
it "defines an end date" do
|
51
|
+
subject.end_date.must_equal subject.start_date
|
52
|
+
end
|
53
|
+
|
54
|
+
it "defines a start time" do
|
55
|
+
subject.start_time.must_equal expected_values[:time]
|
56
|
+
end
|
57
|
+
|
58
|
+
it "defines a description" do
|
59
|
+
subject.description.must_equal subject.title
|
60
|
+
end
|
61
|
+
|
62
|
+
it "defines all day event" do
|
63
|
+
subject.all_day?.must_equal false
|
64
|
+
end
|
65
|
+
|
66
|
+
it "defines all private" do
|
67
|
+
subject.private?.must_equal true
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
describe "#to_gcal" do
|
72
|
+
it "provides an array ready to export to csv" do
|
73
|
+
expected = [
|
74
|
+
subject.title,
|
75
|
+
subject.start_date,
|
76
|
+
subject.start_time,
|
77
|
+
subject.end_date,
|
78
|
+
"",
|
79
|
+
false,
|
80
|
+
subject.description,
|
81
|
+
"",
|
82
|
+
true
|
83
|
+
]
|
84
|
+
|
85
|
+
subject.to_gcal.must_equal expected
|
86
|
+
end
|
29
87
|
end
|
30
88
|
end
|
@@ -4,23 +4,6 @@ describe ScheduleScraper::Pointstreak::Schedule do
|
|
4
4
|
subject() { ScheduleScraper::Pointstreak::Schedule }
|
5
5
|
let(:options) { POINTSTREAK_OPTIONS }
|
6
6
|
|
7
|
-
it "knows the root pointstreak url" do
|
8
|
-
subject::POINT_STREAK_URL.must_match /pointstreak/
|
9
|
-
end
|
10
|
-
|
11
|
-
it "builds a valid url" do
|
12
|
-
url = subject.send(:source_url, "123", "456")
|
13
|
-
expected = "#{subject::POINT_STREAK_URL}?teamid=456&seasonid=123"
|
14
|
-
|
15
|
-
url.must_equal expected
|
16
|
-
end
|
17
|
-
|
18
|
-
it "fetches html from pointstreak" do
|
19
|
-
VCR.use_cassette('summit_summer_2012') do
|
20
|
-
subject.html(options[:season], options[:team])
|
21
|
-
end # wont_raise
|
22
|
-
end
|
23
|
-
|
24
7
|
describe "schedule instance" do
|
25
8
|
subject() do
|
26
9
|
VCR.use_cassette('summit_summer_2012') do
|
@@ -44,5 +27,10 @@ describe ScheduleScraper::Pointstreak::Schedule do
|
|
44
27
|
it "generates a csv file" do
|
45
28
|
subject.to_csv.must_be_instance_of String
|
46
29
|
end
|
30
|
+
|
31
|
+
it "generates a google calendar formatted csv" do
|
32
|
+
p subject.to_gcal
|
33
|
+
subject.to_gcal.must_be_instance_of String
|
34
|
+
end
|
47
35
|
end
|
48
36
|
end
|
@@ -1,11 +1,9 @@
|
|
1
1
|
# require File.expand_path(File.join(File.dirname(__FILE__), '../spec_helper'))
|
2
2
|
|
3
|
-
#
|
4
|
-
#
|
3
|
+
# class ScraperTest
|
4
|
+
# include ScheduleScraper::Schedule
|
5
|
+
# end
|
5
6
|
|
6
|
-
#
|
7
|
-
#
|
8
|
-
# subject.to_csv.must_be_instance_of String
|
9
|
-
# end
|
10
|
-
# end
|
7
|
+
# describe ScheduleScraper::Schedule do
|
8
|
+
# subject() { ScraperTest.new(:xyz, :url => "http://www.xyz.com") }
|
11
9
|
# end
|
@@ -11,10 +11,16 @@ describe ScheduleScraper do
|
|
11
11
|
end
|
12
12
|
|
13
13
|
-> {
|
14
|
-
ScheduleScraper.fetch(:xyz)
|
14
|
+
ScheduleScraper.fetch(:xyz, options)
|
15
15
|
}.must_raise ScheduleScraper::UnsupportedSchedule
|
16
16
|
end
|
17
17
|
|
18
|
+
it "validates the url" do
|
19
|
+
-> {
|
20
|
+
ScheduleScraper.fetch(:pointstreak, "abc")
|
21
|
+
}.must_raise ScheduleScraper::InvalidURL
|
22
|
+
end
|
23
|
+
|
18
24
|
it "returns a schedule" do
|
19
25
|
VCR.use_cassette('summit_summer_2012') do
|
20
26
|
schedule = ScheduleScraper.fetch(:pointstreak, options)
|
data/spec/spec_helper.rb
CHANGED
@@ -5,7 +5,8 @@ require 'vcr'
|
|
5
5
|
|
6
6
|
require File.expand_path(File.join(File.dirname(__FILE__), '../lib/schedule-scraper'))
|
7
7
|
|
8
|
-
POINTSTREAK_OPTIONS = { :season => "9162", :team => "385368" }
|
8
|
+
# POINTSTREAK_OPTIONS = { :season => "9162", :team => "385368" }
|
9
|
+
POINTSTREAK_OPTIONS = "http://www.pointstreak.com/players/print/players-team-schedule.html?teamid=385368&seasonid=9162"
|
9
10
|
|
10
11
|
VCR.configure do |c|
|
11
12
|
c.cassette_library_dir = 'spec/vcr_cassettes'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: schedule-scraper
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-06-
|
12
|
+
date: 2012-06-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nibbler
|
16
|
-
requirement: &
|
16
|
+
requirement: &70168497621260 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,21 @@ dependencies:
|
|
21
21
|
version: 1.3.0
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70168497621260
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: nokogiri
|
27
|
+
requirement: &70168497620760 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ~>
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 1.5.4
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70168497620760
|
25
36
|
- !ruby/object:Gem::Dependency
|
26
37
|
name: minitest
|
27
|
-
requirement: &
|
38
|
+
requirement: &70168497620300 !ruby/object:Gem::Requirement
|
28
39
|
none: false
|
29
40
|
requirements:
|
30
41
|
- - ~>
|
@@ -32,10 +43,10 @@ dependencies:
|
|
32
43
|
version: 3.1.0
|
33
44
|
type: :development
|
34
45
|
prerelease: false
|
35
|
-
version_requirements: *
|
46
|
+
version_requirements: *70168497620300
|
36
47
|
- !ruby/object:Gem::Dependency
|
37
48
|
name: rake
|
38
|
-
requirement: &
|
49
|
+
requirement: &70168497619820 !ruby/object:Gem::Requirement
|
39
50
|
none: false
|
40
51
|
requirements:
|
41
52
|
- - ~>
|
@@ -43,10 +54,10 @@ dependencies:
|
|
43
54
|
version: 0.9.2
|
44
55
|
type: :development
|
45
56
|
prerelease: false
|
46
|
-
version_requirements: *
|
57
|
+
version_requirements: *70168497619820
|
47
58
|
- !ruby/object:Gem::Dependency
|
48
59
|
name: turn
|
49
|
-
requirement: &
|
60
|
+
requirement: &70168497619320 !ruby/object:Gem::Requirement
|
50
61
|
none: false
|
51
62
|
requirements:
|
52
63
|
- - ~>
|
@@ -54,10 +65,10 @@ dependencies:
|
|
54
65
|
version: 0.9.5
|
55
66
|
type: :development
|
56
67
|
prerelease: false
|
57
|
-
version_requirements: *
|
68
|
+
version_requirements: *70168497619320
|
58
69
|
- !ruby/object:Gem::Dependency
|
59
70
|
name: vcr
|
60
|
-
requirement: &
|
71
|
+
requirement: &70168497635140 !ruby/object:Gem::Requirement
|
61
72
|
none: false
|
62
73
|
requirements:
|
63
74
|
- - ~>
|
@@ -65,10 +76,10 @@ dependencies:
|
|
65
76
|
version: 2.2.0
|
66
77
|
type: :development
|
67
78
|
prerelease: false
|
68
|
-
version_requirements: *
|
79
|
+
version_requirements: *70168497635140
|
69
80
|
- !ruby/object:Gem::Dependency
|
70
81
|
name: fakeweb
|
71
|
-
requirement: &
|
82
|
+
requirement: &70168497634600 !ruby/object:Gem::Requirement
|
72
83
|
none: false
|
73
84
|
requirements:
|
74
85
|
- - ~>
|
@@ -76,7 +87,7 @@ dependencies:
|
|
76
87
|
version: 1.3.0
|
77
88
|
type: :development
|
78
89
|
prerelease: false
|
79
|
-
version_requirements: *
|
90
|
+
version_requirements: *70168497634600
|
80
91
|
description: Scrapes online schedules and provides portable versions
|
81
92
|
email:
|
82
93
|
- john@threedogconsulting.com
|