schedule-scraper 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -8,7 +8,8 @@ Supported schedule sites:
8
8
 
9
9
  Supported output formats:
10
10
 
11
- * CSV
11
+ * CSV (plain dump)
12
+ * Google Calendar formatted CSV
12
13
 
13
14
  ## Installation
14
15
 
@@ -26,14 +27,28 @@ Or install it yourself as:
26
27
 
27
28
  ## Usage
28
29
 
30
+ ### Pointstreak Example
31
+
32
+ Locate the printable version of the scheulde:
33
+
34
+ 1. Visit the leagues home page
35
+ 2. Click on the team in question
36
+ 3. Click the SCHEDULE link under TEAM MENU
37
+ 4. Click PRINT THIS PAGE
38
+
29
39
  Request a schedule:
30
40
 
31
- schedule = ScheduleScrape.fetch(:point_streak, :season => 123, :team => 456)
41
+ url = "http://www.pointstreak.com/players/print/players-team-schedule.html?teamid=385368&seasonid=9162"
42
+ schedule = ScheduleScrape.fetch(:point_streak, url)
32
43
 
33
44
  Export the schedule to CSV:
34
45
 
35
46
  schedule.to_csv
36
47
 
48
+ or
49
+
50
+ schedule.to_gcal
51
+
37
52
  ## TODO
38
53
 
39
54
  1. Add more export options: iCal, Google Calendar (csv)
@@ -15,6 +15,20 @@ module ScheduleScraper
15
15
  self.send(field)
16
16
  end
17
17
  end
18
+
19
+ def to_gcal
20
+ [
21
+ title,
22
+ start_date,
23
+ start_time,
24
+ end_date,
25
+ "",
26
+ all_day?,
27
+ description,
28
+ "",
29
+ private?
30
+ ]
31
+ end
18
32
  end
19
33
  end
20
34
 
@@ -10,6 +10,38 @@ module ScheduleScraper
10
10
  element 'td:nth(3)' => :date, :with => cleaner
11
11
  element 'td:nth(4)' => :time, :with => cleaner
12
12
  element 'td:nth(5)' => :rink, :with => cleaner
13
+
14
+ def title
15
+ "#{home_team} vs. #{away_team}"
16
+ end
17
+
18
+ def start_date
19
+ Date.parse(date).strftime("%m/%d/%y")
20
+ end
21
+
22
+ def end_date
23
+ start_date
24
+ end
25
+
26
+ def start_time
27
+ time
28
+ end
29
+
30
+ # def end_time
31
+ # will default to one hour?
32
+ # end
33
+
34
+ def all_day?
35
+ false
36
+ end
37
+
38
+ def description
39
+ title
40
+ end
41
+
42
+ def private?
43
+ true
44
+ end
13
45
  end
14
46
  end
15
47
  end
@@ -3,14 +3,12 @@ module ScheduleScraper
3
3
  class Schedule < Nibbler
4
4
  include ScheduleScraper::Schedule
5
5
 
6
- POINT_STREAK_URL = "http://www.pointstreak.com/players/print/players-team-schedule.html"
7
-
8
6
  element 'table table:last' => :list do
9
7
  elements 'tr:not(.fields)' => :event_list, :with => Event
10
8
  end
11
9
 
12
- def self.fetch(options)
13
- parse html(options[:season], options[:team])
10
+ def self.fetch(url)
11
+ parse open(url)
14
12
  end
15
13
 
16
14
  def events
@@ -19,14 +17,6 @@ module ScheduleScraper
19
17
 
20
18
  private
21
19
 
22
- def self.html(season, team)
23
- open(source_url(season, team))
24
- end
25
-
26
- def self.source_url(season, team)
27
- "#{POINT_STREAK_URL}?teamid=#{team}&seasonid=#{season}"
28
- end
29
-
30
20
  def event_class
31
21
  ScheduleScraper::Pointstreak::Event
32
22
  end
@@ -15,5 +15,26 @@ module ScheduleScraper
15
15
  end
16
16
  end
17
17
  end
18
+
19
+ def to_gcal
20
+ headers = [
21
+ "Subject",
22
+ "Start Date",
23
+ "Start Time",
24
+ "End Date",
25
+ "End Time",
26
+ "All Day Event",
27
+ "Description",
28
+ "Location",
29
+ "Private"
30
+ ]
31
+
32
+ CSV.generate do |csv|
33
+ csv << headers
34
+ events.each do |event|
35
+ csv << event.to_gcal
36
+ end
37
+ end
38
+ end
18
39
  end
19
40
  end
@@ -1,3 +1,3 @@
1
1
  module ScheduleScraper
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -1,6 +1,8 @@
1
1
  require 'nibbler'
2
2
  require 'open-uri'
3
3
  require 'csv'
4
+ require 'uri'
5
+ require 'nokogiri'
4
6
  require "schedule-scraper/version"
5
7
  require "schedule-scraper/event"
6
8
  require "schedule-scraper/schedule"
@@ -8,10 +10,11 @@ require "schedule-scraper/pointstreak/event"
8
10
  require "schedule-scraper/pointstreak/schedule"
9
11
 
10
12
  module ScheduleScraper
11
- def self.fetch(type, options = {})
13
+ def self.fetch(type, url)
12
14
  raise UnsupportedSchedule unless supported_schedules.include?(type.to_sym)
15
+ raise InvalidURL unless valid_url?(url)
13
16
 
14
- type_class(type).fetch(options)
17
+ type_class(type).fetch(url)
15
18
  end
16
19
 
17
20
  def self.type_class(type)
@@ -26,5 +29,13 @@ module ScheduleScraper
26
29
  ]
27
30
  end
28
31
 
32
+ def self.valid_url?(url)
33
+ uri = URI.parse(url)
34
+ uri.kind_of?(URI::HTTP)
35
+ rescue URI::InvalidURIError
36
+ false
37
+ end
38
+
29
39
  class UnsupportedSchedule < StandardError; end
40
+ class InvalidURL < StandardError; end
30
41
  end
@@ -16,6 +16,7 @@ Gem::Specification.new do |gem|
16
16
  gem.version = ScheduleScraper::VERSION
17
17
 
18
18
  gem.add_dependency 'nibbler', '~> 1.3.0'
19
+ gem.add_dependency 'nokogiri', '~> 1.5.4'
19
20
 
20
21
  gem.add_development_dependency 'minitest', '~> 3.1.0'
21
22
  gem.add_development_dependency 'rake', '~> 0.9.2'
@@ -3,6 +3,15 @@ require File.expand_path(File.join(File.dirname(__FILE__), '../../spec_helper'))
3
3
  describe ScheduleScraper::Pointstreak::Event do
4
4
  let(:options) { POINTSTREAK_OPTIONS }
5
5
  let(:fields) { [:home_team, :away_team, :date, :time, :rink] }
6
+ let(:expected_values) do
7
+ {
8
+ :home_team => "BLADES 6",
9
+ :away_team => "SUMMIT 8",
10
+ :date => "Sun, Jun 03",
11
+ :time => "7:45 pm",
12
+ :rink => "final"
13
+ }
14
+ end
6
15
 
7
16
  before do
8
17
  VCR.use_cassette('summit_summer_2012') do
@@ -20,11 +29,60 @@ describe ScheduleScraper::Pointstreak::Event do
20
29
 
21
30
  it "uses elements to define fields for csv" do
22
31
  klass = ScheduleScraper::Pointstreak::Event
23
- klass.send(:export_fields).must_equal fields
32
+ klass.send(:export_fields).must_equal expected_values.keys
24
33
  end
25
34
 
26
35
  it "returns a list of fields when you ask for csv" do
27
- expected = ["BLADES 6", "SUMMIT 8", "Sun, Jun 03", "7:45 pm", "final"]
28
- subject.to_csv.must_equal expected
36
+ subject.to_csv.must_equal expected_values.values
37
+ end
38
+
39
+ describe "output helper methods" do
40
+ it "defines a title" do
41
+ expected = "#{expected_values[:home_team]} vs. #{expected_values[:away_team]}"
42
+ subject.title.must_equal expected
43
+ end
44
+
45
+ it "defines a start date" do
46
+ expected = Date.parse(expected_values[:date]).strftime("%m/%d/%y")
47
+ subject.start_date.must_equal expected
48
+ end
49
+
50
+ it "defines an end date" do
51
+ subject.end_date.must_equal subject.start_date
52
+ end
53
+
54
+ it "defines a start time" do
55
+ subject.start_time.must_equal expected_values[:time]
56
+ end
57
+
58
+ it "defines a description" do
59
+ subject.description.must_equal subject.title
60
+ end
61
+
62
+ it "defines all day event" do
63
+ subject.all_day?.must_equal false
64
+ end
65
+
66
+ it "defines all private" do
67
+ subject.private?.must_equal true
68
+ end
69
+ end
70
+
71
+ describe "#to_gcal" do
72
+ it "provides an array ready to export to csv" do
73
+ expected = [
74
+ subject.title,
75
+ subject.start_date,
76
+ subject.start_time,
77
+ subject.end_date,
78
+ "",
79
+ false,
80
+ subject.description,
81
+ "",
82
+ true
83
+ ]
84
+
85
+ subject.to_gcal.must_equal expected
86
+ end
29
87
  end
30
88
  end
@@ -4,23 +4,6 @@ describe ScheduleScraper::Pointstreak::Schedule do
4
4
  subject() { ScheduleScraper::Pointstreak::Schedule }
5
5
  let(:options) { POINTSTREAK_OPTIONS }
6
6
 
7
- it "knows the root pointstreak url" do
8
- subject::POINT_STREAK_URL.must_match /pointstreak/
9
- end
10
-
11
- it "builds a valid url" do
12
- url = subject.send(:source_url, "123", "456")
13
- expected = "#{subject::POINT_STREAK_URL}?teamid=456&seasonid=123"
14
-
15
- url.must_equal expected
16
- end
17
-
18
- it "fetches html from pointstreak" do
19
- VCR.use_cassette('summit_summer_2012') do
20
- subject.html(options[:season], options[:team])
21
- end # wont_raise
22
- end
23
-
24
7
  describe "schedule instance" do
25
8
  subject() do
26
9
  VCR.use_cassette('summit_summer_2012') do
@@ -44,5 +27,10 @@ describe ScheduleScraper::Pointstreak::Schedule do
44
27
  it "generates a csv file" do
45
28
  subject.to_csv.must_be_instance_of String
46
29
  end
30
+
31
+ it "generates a google calendar formatted csv" do
32
+ p subject.to_gcal
33
+ subject.to_gcal.must_be_instance_of String
34
+ end
47
35
  end
48
36
  end
@@ -1,11 +1,9 @@
1
1
  # require File.expand_path(File.join(File.dirname(__FILE__), '../spec_helper'))
2
2
 
3
- # describe ScheduleScraper::Schedule do
4
- # subject() { ScheduleScraper::Schedule.new({}) }
3
+ # class ScraperTest
4
+ # include ScheduleScraper::Schedule
5
+ # end
5
6
 
6
- # describe "#to_csv" do
7
- # it "returns a schedule in CSV format" do
8
- # subject.to_csv.must_be_instance_of String
9
- # end
10
- # end
7
+ # describe ScheduleScraper::Schedule do
8
+ # subject() { ScraperTest.new(:xyz, :url => "http://www.xyz.com") }
11
9
  # end
@@ -11,10 +11,16 @@ describe ScheduleScraper do
11
11
  end
12
12
 
13
13
  -> {
14
- ScheduleScraper.fetch(:xyz)
14
+ ScheduleScraper.fetch(:xyz, options)
15
15
  }.must_raise ScheduleScraper::UnsupportedSchedule
16
16
  end
17
17
 
18
+ it "validates the url" do
19
+ -> {
20
+ ScheduleScraper.fetch(:pointstreak, "abc")
21
+ }.must_raise ScheduleScraper::InvalidURL
22
+ end
23
+
18
24
  it "returns a schedule" do
19
25
  VCR.use_cassette('summit_summer_2012') do
20
26
  schedule = ScheduleScraper.fetch(:pointstreak, options)
data/spec/spec_helper.rb CHANGED
@@ -5,7 +5,8 @@ require 'vcr'
5
5
 
6
6
  require File.expand_path(File.join(File.dirname(__FILE__), '../lib/schedule-scraper'))
7
7
 
8
- POINTSTREAK_OPTIONS = { :season => "9162", :team => "385368" }
8
+ # POINTSTREAK_OPTIONS = { :season => "9162", :team => "385368" }
9
+ POINTSTREAK_OPTIONS = "http://www.pointstreak.com/players/print/players-team-schedule.html?teamid=385368&seasonid=9162"
9
10
 
10
11
  VCR.configure do |c|
11
12
  c.cassette_library_dir = 'spec/vcr_cassettes'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: schedule-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-06-18 00:00:00.000000000 Z
12
+ date: 2012-06-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nibbler
16
- requirement: &70142865159900 !ruby/object:Gem::Requirement
16
+ requirement: &70168497621260 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,21 @@ dependencies:
21
21
  version: 1.3.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70142865159900
24
+ version_requirements: *70168497621260
25
+ - !ruby/object:Gem::Dependency
26
+ name: nokogiri
27
+ requirement: &70168497620760 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ~>
31
+ - !ruby/object:Gem::Version
32
+ version: 1.5.4
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *70168497620760
25
36
  - !ruby/object:Gem::Dependency
26
37
  name: minitest
27
- requirement: &70142865156300 !ruby/object:Gem::Requirement
38
+ requirement: &70168497620300 !ruby/object:Gem::Requirement
28
39
  none: false
29
40
  requirements:
30
41
  - - ~>
@@ -32,10 +43,10 @@ dependencies:
32
43
  version: 3.1.0
33
44
  type: :development
34
45
  prerelease: false
35
- version_requirements: *70142865156300
46
+ version_requirements: *70168497620300
36
47
  - !ruby/object:Gem::Dependency
37
48
  name: rake
38
- requirement: &70142865155280 !ruby/object:Gem::Requirement
49
+ requirement: &70168497619820 !ruby/object:Gem::Requirement
39
50
  none: false
40
51
  requirements:
41
52
  - - ~>
@@ -43,10 +54,10 @@ dependencies:
43
54
  version: 0.9.2
44
55
  type: :development
45
56
  prerelease: false
46
- version_requirements: *70142865155280
57
+ version_requirements: *70168497619820
47
58
  - !ruby/object:Gem::Dependency
48
59
  name: turn
49
- requirement: &70142865153960 !ruby/object:Gem::Requirement
60
+ requirement: &70168497619320 !ruby/object:Gem::Requirement
50
61
  none: false
51
62
  requirements:
52
63
  - - ~>
@@ -54,10 +65,10 @@ dependencies:
54
65
  version: 0.9.5
55
66
  type: :development
56
67
  prerelease: false
57
- version_requirements: *70142865153960
68
+ version_requirements: *70168497619320
58
69
  - !ruby/object:Gem::Dependency
59
70
  name: vcr
60
- requirement: &70142865171400 !ruby/object:Gem::Requirement
71
+ requirement: &70168497635140 !ruby/object:Gem::Requirement
61
72
  none: false
62
73
  requirements:
63
74
  - - ~>
@@ -65,10 +76,10 @@ dependencies:
65
76
  version: 2.2.0
66
77
  type: :development
67
78
  prerelease: false
68
- version_requirements: *70142865171400
79
+ version_requirements: *70168497635140
69
80
  - !ruby/object:Gem::Dependency
70
81
  name: fakeweb
71
- requirement: &70142865170560 !ruby/object:Gem::Requirement
82
+ requirement: &70168497634600 !ruby/object:Gem::Requirement
72
83
  none: false
73
84
  requirements:
74
85
  - - ~>
@@ -76,7 +87,7 @@ dependencies:
76
87
  version: 1.3.0
77
88
  type: :development
78
89
  prerelease: false
79
- version_requirements: *70142865170560
90
+ version_requirements: *70168497634600
80
91
  description: Scrapes online schedules and provides portable versions
81
92
  email:
82
93
  - john@threedogconsulting.com