schedule-scraper 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -8,7 +8,8 @@ Supported schedule sites:
8
8
 
9
9
  Supported output formats:
10
10
 
11
- * CSV
11
+ * CSV (plain dump)
12
+ * Google Calendar formatted CSV
12
13
 
13
14
  ## Installation
14
15
 
@@ -26,14 +27,28 @@ Or install it yourself as:
26
27
 
27
28
  ## Usage
28
29
 
30
+ ### Pointstreak Example
31
+
32
+ Locate the printable version of the scheulde:
33
+
34
+ 1. Visit the leagues home page
35
+ 2. Click on the team in question
36
+ 3. Click the SCHEDULE link under TEAM MENU
37
+ 4. Click PRINT THIS PAGE
38
+
29
39
  Request a schedule:
30
40
 
31
- schedule = ScheduleScrape.fetch(:point_streak, :season => 123, :team => 456)
41
+ url = "http://www.pointstreak.com/players/print/players-team-schedule.html?teamid=385368&seasonid=9162"
42
+ schedule = ScheduleScrape.fetch(:point_streak, url)
32
43
 
33
44
  Export the schedule to CSV:
34
45
 
35
46
  schedule.to_csv
36
47
 
48
+ or
49
+
50
+ schedule.to_gcal
51
+
37
52
  ## TODO
38
53
 
39
54
  1. Add more export options: iCal, Google Calendar (csv)
@@ -15,6 +15,20 @@ module ScheduleScraper
15
15
  self.send(field)
16
16
  end
17
17
  end
18
+
19
+ def to_gcal
20
+ [
21
+ title,
22
+ start_date,
23
+ start_time,
24
+ end_date,
25
+ "",
26
+ all_day?,
27
+ description,
28
+ "",
29
+ private?
30
+ ]
31
+ end
18
32
  end
19
33
  end
20
34
 
@@ -10,6 +10,38 @@ module ScheduleScraper
10
10
  element 'td:nth(3)' => :date, :with => cleaner
11
11
  element 'td:nth(4)' => :time, :with => cleaner
12
12
  element 'td:nth(5)' => :rink, :with => cleaner
13
+
14
+ def title
15
+ "#{home_team} vs. #{away_team}"
16
+ end
17
+
18
+ def start_date
19
+ Date.parse(date).strftime("%m/%d/%y")
20
+ end
21
+
22
+ def end_date
23
+ start_date
24
+ end
25
+
26
+ def start_time
27
+ time
28
+ end
29
+
30
+ # def end_time
31
+ # will default to one hour?
32
+ # end
33
+
34
+ def all_day?
35
+ false
36
+ end
37
+
38
+ def description
39
+ title
40
+ end
41
+
42
+ def private?
43
+ true
44
+ end
13
45
  end
14
46
  end
15
47
  end
@@ -3,14 +3,12 @@ module ScheduleScraper
3
3
  class Schedule < Nibbler
4
4
  include ScheduleScraper::Schedule
5
5
 
6
- POINT_STREAK_URL = "http://www.pointstreak.com/players/print/players-team-schedule.html"
7
-
8
6
  element 'table table:last' => :list do
9
7
  elements 'tr:not(.fields)' => :event_list, :with => Event
10
8
  end
11
9
 
12
- def self.fetch(options)
13
- parse html(options[:season], options[:team])
10
+ def self.fetch(url)
11
+ parse open(url)
14
12
  end
15
13
 
16
14
  def events
@@ -19,14 +17,6 @@ module ScheduleScraper
19
17
 
20
18
  private
21
19
 
22
- def self.html(season, team)
23
- open(source_url(season, team))
24
- end
25
-
26
- def self.source_url(season, team)
27
- "#{POINT_STREAK_URL}?teamid=#{team}&seasonid=#{season}"
28
- end
29
-
30
20
  def event_class
31
21
  ScheduleScraper::Pointstreak::Event
32
22
  end
@@ -15,5 +15,26 @@ module ScheduleScraper
15
15
  end
16
16
  end
17
17
  end
18
+
19
+ def to_gcal
20
+ headers = [
21
+ "Subject",
22
+ "Start Date",
23
+ "Start Time",
24
+ "End Date",
25
+ "End Time",
26
+ "All Day Event",
27
+ "Description",
28
+ "Location",
29
+ "Private"
30
+ ]
31
+
32
+ CSV.generate do |csv|
33
+ csv << headers
34
+ events.each do |event|
35
+ csv << event.to_gcal
36
+ end
37
+ end
38
+ end
18
39
  end
19
40
  end
@@ -1,3 +1,3 @@
1
1
  module ScheduleScraper
2
- VERSION = "0.1.0"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -1,6 +1,8 @@
1
1
  require 'nibbler'
2
2
  require 'open-uri'
3
3
  require 'csv'
4
+ require 'uri'
5
+ require 'nokogiri'
4
6
  require "schedule-scraper/version"
5
7
  require "schedule-scraper/event"
6
8
  require "schedule-scraper/schedule"
@@ -8,10 +10,11 @@ require "schedule-scraper/pointstreak/event"
8
10
  require "schedule-scraper/pointstreak/schedule"
9
11
 
10
12
  module ScheduleScraper
11
- def self.fetch(type, options = {})
13
+ def self.fetch(type, url)
12
14
  raise UnsupportedSchedule unless supported_schedules.include?(type.to_sym)
15
+ raise InvalidURL unless valid_url?(url)
13
16
 
14
- type_class(type).fetch(options)
17
+ type_class(type).fetch(url)
15
18
  end
16
19
 
17
20
  def self.type_class(type)
@@ -26,5 +29,13 @@ module ScheduleScraper
26
29
  ]
27
30
  end
28
31
 
32
+ def self.valid_url?(url)
33
+ uri = URI.parse(url)
34
+ uri.kind_of?(URI::HTTP)
35
+ rescue URI::InvalidURIError
36
+ false
37
+ end
38
+
29
39
  class UnsupportedSchedule < StandardError; end
40
+ class InvalidURL < StandardError; end
30
41
  end
@@ -16,6 +16,7 @@ Gem::Specification.new do |gem|
16
16
  gem.version = ScheduleScraper::VERSION
17
17
 
18
18
  gem.add_dependency 'nibbler', '~> 1.3.0'
19
+ gem.add_dependency 'nokogiri', '~> 1.5.4'
19
20
 
20
21
  gem.add_development_dependency 'minitest', '~> 3.1.0'
21
22
  gem.add_development_dependency 'rake', '~> 0.9.2'
@@ -3,6 +3,15 @@ require File.expand_path(File.join(File.dirname(__FILE__), '../../spec_helper'))
3
3
  describe ScheduleScraper::Pointstreak::Event do
4
4
  let(:options) { POINTSTREAK_OPTIONS }
5
5
  let(:fields) { [:home_team, :away_team, :date, :time, :rink] }
6
+ let(:expected_values) do
7
+ {
8
+ :home_team => "BLADES 6",
9
+ :away_team => "SUMMIT 8",
10
+ :date => "Sun, Jun 03",
11
+ :time => "7:45 pm",
12
+ :rink => "final"
13
+ }
14
+ end
6
15
 
7
16
  before do
8
17
  VCR.use_cassette('summit_summer_2012') do
@@ -20,11 +29,60 @@ describe ScheduleScraper::Pointstreak::Event do
20
29
 
21
30
  it "uses elements to define fields for csv" do
22
31
  klass = ScheduleScraper::Pointstreak::Event
23
- klass.send(:export_fields).must_equal fields
32
+ klass.send(:export_fields).must_equal expected_values.keys
24
33
  end
25
34
 
26
35
  it "returns a list of fields when you ask for csv" do
27
- expected = ["BLADES 6", "SUMMIT 8", "Sun, Jun 03", "7:45 pm", "final"]
28
- subject.to_csv.must_equal expected
36
+ subject.to_csv.must_equal expected_values.values
37
+ end
38
+
39
+ describe "output helper methods" do
40
+ it "defines a title" do
41
+ expected = "#{expected_values[:home_team]} vs. #{expected_values[:away_team]}"
42
+ subject.title.must_equal expected
43
+ end
44
+
45
+ it "defines a start date" do
46
+ expected = Date.parse(expected_values[:date]).strftime("%m/%d/%y")
47
+ subject.start_date.must_equal expected
48
+ end
49
+
50
+ it "defines an end date" do
51
+ subject.end_date.must_equal subject.start_date
52
+ end
53
+
54
+ it "defines a start time" do
55
+ subject.start_time.must_equal expected_values[:time]
56
+ end
57
+
58
+ it "defines a description" do
59
+ subject.description.must_equal subject.title
60
+ end
61
+
62
+ it "defines all day event" do
63
+ subject.all_day?.must_equal false
64
+ end
65
+
66
+ it "defines all private" do
67
+ subject.private?.must_equal true
68
+ end
69
+ end
70
+
71
+ describe "#to_gcal" do
72
+ it "provides an array ready to export to csv" do
73
+ expected = [
74
+ subject.title,
75
+ subject.start_date,
76
+ subject.start_time,
77
+ subject.end_date,
78
+ "",
79
+ false,
80
+ subject.description,
81
+ "",
82
+ true
83
+ ]
84
+
85
+ subject.to_gcal.must_equal expected
86
+ end
29
87
  end
30
88
  end
@@ -4,23 +4,6 @@ describe ScheduleScraper::Pointstreak::Schedule do
4
4
  subject() { ScheduleScraper::Pointstreak::Schedule }
5
5
  let(:options) { POINTSTREAK_OPTIONS }
6
6
 
7
- it "knows the root pointstreak url" do
8
- subject::POINT_STREAK_URL.must_match /pointstreak/
9
- end
10
-
11
- it "builds a valid url" do
12
- url = subject.send(:source_url, "123", "456")
13
- expected = "#{subject::POINT_STREAK_URL}?teamid=456&seasonid=123"
14
-
15
- url.must_equal expected
16
- end
17
-
18
- it "fetches html from pointstreak" do
19
- VCR.use_cassette('summit_summer_2012') do
20
- subject.html(options[:season], options[:team])
21
- end # wont_raise
22
- end
23
-
24
7
  describe "schedule instance" do
25
8
  subject() do
26
9
  VCR.use_cassette('summit_summer_2012') do
@@ -44,5 +27,10 @@ describe ScheduleScraper::Pointstreak::Schedule do
44
27
  it "generates a csv file" do
45
28
  subject.to_csv.must_be_instance_of String
46
29
  end
30
+
31
+ it "generates a google calendar formatted csv" do
32
+ p subject.to_gcal
33
+ subject.to_gcal.must_be_instance_of String
34
+ end
47
35
  end
48
36
  end
@@ -1,11 +1,9 @@
1
1
  # require File.expand_path(File.join(File.dirname(__FILE__), '../spec_helper'))
2
2
 
3
- # describe ScheduleScraper::Schedule do
4
- # subject() { ScheduleScraper::Schedule.new({}) }
3
+ # class ScraperTest
4
+ # include ScheduleScraper::Schedule
5
+ # end
5
6
 
6
- # describe "#to_csv" do
7
- # it "returns a schedule in CSV format" do
8
- # subject.to_csv.must_be_instance_of String
9
- # end
10
- # end
7
+ # describe ScheduleScraper::Schedule do
8
+ # subject() { ScraperTest.new(:xyz, :url => "http://www.xyz.com") }
11
9
  # end
@@ -11,10 +11,16 @@ describe ScheduleScraper do
11
11
  end
12
12
 
13
13
  -> {
14
- ScheduleScraper.fetch(:xyz)
14
+ ScheduleScraper.fetch(:xyz, options)
15
15
  }.must_raise ScheduleScraper::UnsupportedSchedule
16
16
  end
17
17
 
18
+ it "validates the url" do
19
+ -> {
20
+ ScheduleScraper.fetch(:pointstreak, "abc")
21
+ }.must_raise ScheduleScraper::InvalidURL
22
+ end
23
+
18
24
  it "returns a schedule" do
19
25
  VCR.use_cassette('summit_summer_2012') do
20
26
  schedule = ScheduleScraper.fetch(:pointstreak, options)
data/spec/spec_helper.rb CHANGED
@@ -5,7 +5,8 @@ require 'vcr'
5
5
 
6
6
  require File.expand_path(File.join(File.dirname(__FILE__), '../lib/schedule-scraper'))
7
7
 
8
- POINTSTREAK_OPTIONS = { :season => "9162", :team => "385368" }
8
+ # POINTSTREAK_OPTIONS = { :season => "9162", :team => "385368" }
9
+ POINTSTREAK_OPTIONS = "http://www.pointstreak.com/players/print/players-team-schedule.html?teamid=385368&seasonid=9162"
9
10
 
10
11
  VCR.configure do |c|
11
12
  c.cassette_library_dir = 'spec/vcr_cassettes'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: schedule-scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-06-18 00:00:00.000000000 Z
12
+ date: 2012-06-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nibbler
16
- requirement: &70142865159900 !ruby/object:Gem::Requirement
16
+ requirement: &70168497621260 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,21 @@ dependencies:
21
21
  version: 1.3.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70142865159900
24
+ version_requirements: *70168497621260
25
+ - !ruby/object:Gem::Dependency
26
+ name: nokogiri
27
+ requirement: &70168497620760 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ~>
31
+ - !ruby/object:Gem::Version
32
+ version: 1.5.4
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *70168497620760
25
36
  - !ruby/object:Gem::Dependency
26
37
  name: minitest
27
- requirement: &70142865156300 !ruby/object:Gem::Requirement
38
+ requirement: &70168497620300 !ruby/object:Gem::Requirement
28
39
  none: false
29
40
  requirements:
30
41
  - - ~>
@@ -32,10 +43,10 @@ dependencies:
32
43
  version: 3.1.0
33
44
  type: :development
34
45
  prerelease: false
35
- version_requirements: *70142865156300
46
+ version_requirements: *70168497620300
36
47
  - !ruby/object:Gem::Dependency
37
48
  name: rake
38
- requirement: &70142865155280 !ruby/object:Gem::Requirement
49
+ requirement: &70168497619820 !ruby/object:Gem::Requirement
39
50
  none: false
40
51
  requirements:
41
52
  - - ~>
@@ -43,10 +54,10 @@ dependencies:
43
54
  version: 0.9.2
44
55
  type: :development
45
56
  prerelease: false
46
- version_requirements: *70142865155280
57
+ version_requirements: *70168497619820
47
58
  - !ruby/object:Gem::Dependency
48
59
  name: turn
49
- requirement: &70142865153960 !ruby/object:Gem::Requirement
60
+ requirement: &70168497619320 !ruby/object:Gem::Requirement
50
61
  none: false
51
62
  requirements:
52
63
  - - ~>
@@ -54,10 +65,10 @@ dependencies:
54
65
  version: 0.9.5
55
66
  type: :development
56
67
  prerelease: false
57
- version_requirements: *70142865153960
68
+ version_requirements: *70168497619320
58
69
  - !ruby/object:Gem::Dependency
59
70
  name: vcr
60
- requirement: &70142865171400 !ruby/object:Gem::Requirement
71
+ requirement: &70168497635140 !ruby/object:Gem::Requirement
61
72
  none: false
62
73
  requirements:
63
74
  - - ~>
@@ -65,10 +76,10 @@ dependencies:
65
76
  version: 2.2.0
66
77
  type: :development
67
78
  prerelease: false
68
- version_requirements: *70142865171400
79
+ version_requirements: *70168497635140
69
80
  - !ruby/object:Gem::Dependency
70
81
  name: fakeweb
71
- requirement: &70142865170560 !ruby/object:Gem::Requirement
82
+ requirement: &70168497634600 !ruby/object:Gem::Requirement
72
83
  none: false
73
84
  requirements:
74
85
  - - ~>
@@ -76,7 +87,7 @@ dependencies:
76
87
  version: 1.3.0
77
88
  type: :development
78
89
  prerelease: false
79
- version_requirements: *70142865170560
90
+ version_requirements: *70168497634600
80
91
  description: Scrapes online schedules and provides portable versions
81
92
  email:
82
93
  - john@threedogconsulting.com