amtrak 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +2 -0
  3. data/.rubocop.yml +6 -0
  4. data/.travis.yml +5 -0
  5. data/Gemfile +3 -0
  6. data/README.markdown +3 -0
  7. data/Rakefile +23 -0
  8. data/amtrak.gemspec +29 -0
  9. data/lib/amtrak.rb +14 -0
  10. data/lib/amtrak/train_fetcher.rb +39 -0
  11. data/lib/amtrak/train_fetcher/main_page.rb +70 -0
  12. data/lib/amtrak/train_fetcher/train_page.rb +41 -0
  13. data/lib/amtrak/train_parser.rb +90 -0
  14. data/lib/amtrak/version.rb +6 -0
  15. data/spec/amtrak/train_fetcher/main_page_spec.rb +75 -0
  16. data/spec/amtrak/train_fetcher/train_page_spec.rb +37 -0
  17. data/spec/amtrak/train_fetcher_spec.rb +59 -0
  18. data/spec/amtrak/train_parser_spec.rb +230 -0
  19. data/spec/amtrak_spec.rb +243 -0
  20. data/spec/fixtures/html/pvd_to_bby.html +2401 -0
  21. data/spec/fixtures/vcr/Amtrak/_get/returns_a_list_of_train_times.yml +1469 -0
  22. data/spec/fixtures/vcr/Amtrak_TrainFetcher/_get/does_the_same_as_get.yml +1467 -0
  23. data/spec/fixtures/vcr/Amtrak_TrainFetcher/_get/given_a_valid_date_and_invalid_train_stations/does_not_include_various_classes_and_includes_an_error.yml +843 -0
  24. data/spec/fixtures/vcr/Amtrak_TrainFetcher/_get/given_a_valid_date_and_train_stations/includes_various_classes.yml +1467 -0
  25. data/spec/fixtures/vcr/Amtrak_TrainFetcher/_get/given_an_invalid_date_and_valid_train_stations/does_not_include_various_classes_and_includes_an_error.yml +838 -0
  26. data/spec/fixtures/vcr/Amtrak_TrainFetcher_MainPage/_session_id/pulls_the_session_id_from_the_cookies.yml +1082 -0
  27. data/spec/fixtures/vcr/Amtrak_TrainFetcher_MainPage/_total_pages/when_more_than_one_page_exists_on_the_website/returns_1.yml +1076 -0
  28. data/spec/fixtures/vcr/Amtrak_TrainFetcher_MainPage/_total_pages/when_more_than_one_page_exists_on_the_website/returns_2.yml +1074 -0
  29. data/spec/fixtures/vcr/Amtrak_TrainFetcher_MainPage/_total_pages/when_only_one_page_exists_on_the_website/returns_1.yml +1082 -0
  30. data/spec/fixtures/vcr/Amtrak_TrainFetcher_TrainPage/_get/when_it_works/includes_various_classes.yml +247 -0
  31. data/spec/spec_helper.rb +18 -0
  32. data/spec/support/vcr.rb +10 -0
  33. metadata +206 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 893ffef1c08c1763b3a34c0f70d9dd303f89385f
4
+ data.tar.gz: c2e940a65b8bf344359b2ca25a15b0c853776c3a
5
+ SHA512:
6
+ metadata.gz: f98144098bbe5944ababe3817acc7713a838279dc63b1a9cacf5af17a34584faaf879c8ebce826a462867ddab44bce7fb5db5e1fca9ad4acce5ad05a60edf442
7
+ data.tar.gz: 42ce40b14aeb3341e0d46e20512b2275fb80e02d51cdf03ff475314974084db1190244c340f79cf6c175add675cc2d261e0d3a28bf1cd6249008c109cc10dd84
@@ -0,0 +1,2 @@
1
+ Gemfile.lock
2
+ coverage
@@ -0,0 +1,6 @@
1
+ Lint/AssignmentInCondition:
2
+ Enabled: false
3
+ Metrics/MethodLength:
4
+ Max: 12
5
+ Metrics/LineLength:
6
+ Max: 100
@@ -0,0 +1,5 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.0.0
4
+ - 2.1.1
5
+ script: bundle exec rake
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
@@ -0,0 +1,3 @@
1
+ # Amtrak
2
+
3
+ Pulls down the train times from the Amtrak website
@@ -0,0 +1,23 @@
1
+ require 'rake'
2
+
3
+ default_tasks = []
4
+
5
+ begin
6
+ require 'rubocop/rake_task'
7
+ RuboCop::RakeTask.new(:rubocop) do |task|
8
+ task.patterns = ['lib/**/*.rb','spec/**/*.rb']
9
+ end
10
+
11
+ default_tasks << :rubocop
12
+ rescue LoadError
13
+ end
14
+
15
+ begin
16
+ require 'rspec/core/rake_task'
17
+ RSpec::Core::RakeTask.new(:spec)
18
+
19
+ default_tasks << :spec
20
+ rescue LoadError
21
+ end
22
+
23
+ task default: default_tasks
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'amtrak/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'amtrak'
8
+ spec.version = Amtrak::Version::VERSION
9
+ spec.authors = ['Todd Lunter']
10
+ spec.email = ['tlunter@gmail.com']
11
+ spec.description = %q{Scrape train times from Amtrak.com}
12
+ spec.summary = %q{Scrape train times from Amtrak.com}
13
+ spec.homepage = 'https://github.com/tlunter/amtrak_gem'
14
+ spec.license = 'MIT'
15
+ spec.files = `git ls-files`.split($/)
16
+ spec.executables = spec.files.grep(%{^bin/_}) { File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ['lib']
19
+
20
+ spec.add_dependency 'excon', '~> 0.41.0'
21
+ spec.add_dependency 'nokogiri', '~> 1.6.4'
22
+
23
+ spec.add_development_dependency 'rake'
24
+ spec.add_development_dependency 'pry'
25
+ spec.add_development_dependency 'rspec', '~> 3.0.0'
26
+ spec.add_development_dependency 'rubocop', '~> 0.25.0'
27
+ spec.add_development_dependency 'simplecov', '~> 0.8.2'
28
+ spec.add_development_dependency 'vcr', '~> 2.9.2'
29
+ end
@@ -0,0 +1,14 @@
1
+ # Main Amtrak module
2
+ module Amtrak
3
+ class Error < StandardError; end
4
+
5
+ def self.get(from, to, date: nil)
6
+ Amtrak::TrainFetcher.get(from, to, date: date).map do |html|
7
+ Amtrak::TrainParser.parse(html)
8
+ end.flatten
9
+ end
10
+ end
11
+
12
+ require 'amtrak/train_parser'
13
+ require 'amtrak/train_fetcher'
14
+ require 'amtrak/version'
@@ -0,0 +1,39 @@
1
+ module Amtrak
2
+ # Service for getting train time HTML page from the Amtrak website
3
+ class TrainFetcher
4
+ class Error < Amtrak::Error; end
5
+
6
+ def self.get(*args)
7
+ new(*args).get
8
+ end
9
+
10
+ attr_reader :from, :to, :date
11
+
12
+ def initialize(from, to, date: nil)
13
+ @from = from
14
+ @to = to
15
+ @date = date
16
+ end
17
+
18
+ def get
19
+ (1..total_pages).map do |page|
20
+ Amtrak::TrainFetcher::TrainPage.get(session_id, page)
21
+ end
22
+ end
23
+
24
+ def first_page
25
+ @first_page ||= Amtrak::TrainFetcher::MainPage.new(from, to, date: date)
26
+ end
27
+
28
+ def session_id
29
+ @session_id ||= first_page.session_id
30
+ end
31
+
32
+ def total_pages
33
+ @total_pages ||= first_page.total_pages
34
+ end
35
+ end
36
+ end
37
+
38
+ require 'amtrak/train_fetcher/main_page'
39
+ require 'amtrak/train_fetcher/train_page'
@@ -0,0 +1,70 @@
1
+ require 'excon'
2
+ require 'date'
3
+
4
+ module Amtrak
5
+ class TrainFetcher
6
+ # Service for getting train time results/cookies from the Amtrak website
7
+ class MainPage
8
+ attr_reader :from, :to
9
+
10
+ def initialize(from, to, date: nil)
11
+ @from = from
12
+ @to = to
13
+ @date = date
14
+ end
15
+
16
+ def page
17
+ @page ||= Excon.post(
18
+ 'http://tickets.amtrak.com/itd/amtrak',
19
+ headers: headers,
20
+ body: URI.encode_www_form(body),
21
+ expects: [200]
22
+ )
23
+ rescue Excon::Errors::ClientError, Excon::Errors::ServerError => ex
24
+ raise Amtrak::TrainFetcher::Error, "#{ex.class} #{ex.message}"
25
+ end
26
+
27
+ def headers
28
+ { 'Content-Type' => 'application/x-www-form-urlencoded' }
29
+ end
30
+
31
+ # rubocop:disable all
32
+ def body
33
+ {
34
+ "_handler=amtrak.presentation.handler.request.rail.AmtrakRailTrainStatusSearchRequestHandler/_xpath=/sessionWorkflow/productWorkflow[@product='Rail']" => '',
35
+ "/sessionWorkflow/productWorkflow[@product='Rail']/tripRequirements/journeyRequirements[1]/departDate.date" => departure_date,
36
+ 'requestor' => 'amtrak.presentation.handler.page.rail.AmtrakRailGetTrainStatusPageHandler',
37
+ 'xwdf_trainNumber' => "/sessionWorkflow/productWorkflow[@product='Rail']/tripRequirements/journeyRequirements[1]/segmentRequirements[1]/serviceCode",
38
+ 'wdf_trainNumber' => 'optional',
39
+ 'xwdf_SortBy' => "/sessionWorkflow/productWorkflow[@product='Rail']/tripRequirements/journeyRequirements[1]/departDate/@radioSelect",
40
+ 'wdf_SortBy' => 'arrivalTime',
41
+ 'xwdf_origin' => "/sessionWorkflow/productWorkflow[@product='Rail']/travelSelection/journeySelection[1]/departLocation/search",
42
+ 'wdf_origin' => from.to_s,
43
+ 'xwdf_destination' => "/sessionWorkflow/productWorkflow[@product='Rail']/travelSelection/journeySelection[1]/arriveLocation/search",
44
+ 'wdf_destination' => to.to_s,
45
+ }
46
+ end
47
+ # rubocop:enable all
48
+
49
+ def departure_date
50
+ date.strftime('%a, %b %-d, %Y')
51
+ end
52
+
53
+ def date
54
+ @date ||= Date.today
55
+ end
56
+
57
+ def session_id
58
+ page.headers['Set-Cookie'].match(/JSESSIONID=([^;]*)/)[1]
59
+ end
60
+
61
+ def total_pages
62
+ (Float(extract_listing_length) / 10).ceil
63
+ end
64
+
65
+ def extract_listing_length
66
+ page.body.match(/var availabilityLength = '(\d+)';/)[1]
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,41 @@
1
+ require 'excon'
2
+
3
+ module Amtrak
4
+ class TrainFetcher
5
+ # Service for getting per page train time HTML from the Amtrak website
6
+ class TrainPage
7
+ def self.get(*args)
8
+ new(*args).get
9
+ end
10
+
11
+ attr_reader :session_id, :page
12
+
13
+ def initialize(session_id, page)
14
+ @session_id = session_id
15
+ @page = page
16
+ end
17
+
18
+ def get
19
+ request.body
20
+ end
21
+
22
+ def request
23
+ @request ||= Excon.get(
24
+ 'https://tickets.amtrak.com/itd/amtrak/TrainStatusRequest',
25
+ headers: headers,
26
+ query: query
27
+ )
28
+ rescue Excon::Errors::ClientError, Excon::Errors::ServerError => ex
29
+ raise Amtrak::TrainFetcher::Error, "#{ex.class} #{ex.message}"
30
+ end
31
+
32
+ def headers
33
+ { 'Cookie' => "JSESSIONID=#{session_id}" }
34
+ end
35
+
36
+ def query
37
+ { '_trainstatuspage' => page }
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,90 @@
1
+ require 'nokogiri'
2
+
3
+ module Amtrak
4
+ # Will take in an HTML document as a string and parse out the train schedule
5
+ class TrainParser
6
+ # Error raised when the parser runs into an issue
7
+ class Error < Amtrak::Error; end
8
+
9
+ def self.parse(text)
10
+ new(text).parse
11
+ end
12
+
13
+ attr_reader :document
14
+
15
+ def initialize(document)
16
+ @document = Nokogiri::HTML.parse(document)
17
+ end
18
+
19
+ def parse
20
+ trains = []
21
+
22
+ until train_nodes.empty?
23
+ departure, arrival = train_nodes.shift(2)
24
+ trains << {
25
+ number: parse_train_number(departure),
26
+ departure: parse_train(departure),
27
+ arrival: parse_train(arrival)
28
+ }
29
+ end
30
+
31
+ trains
32
+ rescue Nokogiri::SyntaxError => ex
33
+ raise Amtrak::TrainParser::Error, "#{ex.class} #{ex.message}"
34
+ end
35
+
36
+ def train_nodes
37
+ @train_nodes ||= document.search(
38
+ "//tr[contains(@class, 'status_result')]"
39
+ ).tap { |results| fail 'No trains found' unless results.count > 0 }.to_a
40
+ end
41
+
42
+ def parse_train_number(node)
43
+ find!(
44
+ node, ".//th[@class='service']/div[@class='route_num']/text()"
45
+ ).to_s.to_i
46
+ end
47
+
48
+ def parse_train(node) # rubocop:disable Metrics/MethodLength
49
+ scheduled_date = find!(
50
+ node, ".//td[@class='scheduled']/div[@class='date']/text()"
51
+ ).to_s
52
+ estimated_date = find!(
53
+ node, ".//td[@class='act_est']/div[@class='date']/text()"
54
+ ).to_s
55
+ scheduled_time = remove_parentheses(
56
+ find!(node, ".//td[@class='scheduled']/div[@class='time']/text()").to_s
57
+ )
58
+ estimated_time = remove_parentheses(
59
+ find!(node, ".//td[@class='act_est']/div[@class='time']/text()").to_s
60
+ )
61
+
62
+ {
63
+ scheduled_date: scheduled_date,
64
+ estimated_date: estimated_date,
65
+ scheduled_time: scheduled_time,
66
+ estimated_time: estimated_time
67
+ }
68
+ end
69
+
70
+ def find!(node, xpath)
71
+ node.search(xpath).tap { |rs| fail "#{rs.count} results" if rs.count > 1 }
72
+ end
73
+
74
+ def make_datetime(date, time)
75
+ if date.nil? || time.nil?
76
+ return
77
+ else
78
+ DateTime.parse("#{date} at #{time}")
79
+ end
80
+ end
81
+
82
+ def remove_parentheses(time_string)
83
+ if matches = /\(([^)]+)\)/.match(time_string)
84
+ matches[1]
85
+ else
86
+ time_string
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,6 @@
1
+ module Amtrak
2
+ # This module holds the Amtrak version
3
+ module Version
4
+ VERSION = '0.0.1'
5
+ end
6
+ end
@@ -0,0 +1,75 @@
1
+ require 'spec_helper'
2
+
3
+ describe Amtrak::TrainFetcher::MainPage do
4
+ describe '#departure_date' do
5
+ subject { described_class.new('', '', date: date) }
6
+ let(:date) { Date.parse('2014-11-12') }
7
+
8
+ it 'prints out a formatted date' do
9
+ expect(subject.departure_date).to eq('Wed, Nov 12, 2014')
10
+ end
11
+ end
12
+
13
+ describe '#date' do
14
+ subject { described_class.new('', '', date: date) }
15
+
16
+ context 'on an instance with a date' do
17
+ let(:date) { Date.parse('2014-11-12') }
18
+
19
+ it 'returns the set date' do
20
+ expect(subject.date).to eq(date)
21
+ end
22
+ end
23
+
24
+ context 'on an instance without a date' do
25
+ let(:date) { nil }
26
+ let(:stubbed_date) { Date.parse('2014-11-12') }
27
+
28
+ it 'returns the current date' do
29
+ expect(Date).to receive(:today).and_return(stubbed_date)
30
+ expect(subject.date).to eq(stubbed_date)
31
+ end
32
+ end
33
+ end
34
+
35
+ describe '#total_pages' do
36
+ context 'when only one page exists on the website' do
37
+ subject { described_class.new('pvd', 'bby', date: Date.parse('2014-11-28')) }
38
+
39
+ it 'returns 1', :vcr do
40
+ expect(subject.total_pages).to eq(1)
41
+ end
42
+ end
43
+
44
+ context 'when more than one page exists on the website' do
45
+ subject { described_class.new('pvd', 'bby', date: Date.parse('2014-11-27')) }
46
+
47
+ it 'returns 2', :vcr do
48
+ expect(subject.total_pages).to eq(2)
49
+ end
50
+ end
51
+ end
52
+
53
+ describe '#session_id' do
54
+ subject { described_class.new('pvd', 'bby', date: Date.parse('2014-11-28')) }
55
+ it 'pulls the session id from the cookies', :vcr do
56
+ expect(subject.session_id).to eq('0000kOxPFtO4mDoSiIGk2yzvAz6:187j4dq9a')
57
+ end
58
+ end
59
+
60
+ describe '#page' do
61
+ context 'when Excon raises' do
62
+ subject { described_class.new('', '') }
63
+
64
+ it 'returns a TrainFetcher::Error' do
65
+ expect(Excon).to receive(:post) { fail Excon::Errors::ClientError, '' }
66
+ expect { subject.page }.to raise_error(Amtrak::TrainFetcher::Error)
67
+ end
68
+
69
+ it 'returns a TrainFetcher::Error' do
70
+ expect(Excon).to receive(:post) { fail Excon::Errors::ServerError, '' }
71
+ expect { subject.page }.to raise_error(Amtrak::TrainFetcher::Error)
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,37 @@
1
+ require 'spec_helper'
2
+
3
+ describe Amtrak::TrainFetcher::TrainPage do
4
+ subject { described_class.get(session_id, page) }
5
+
6
+ describe '#get', :vcr do
7
+ context 'when it works' do
8
+ let(:session_id) { '0000ITjAiGUWm-mBBpAyeGZ43Rv:187j4ddrq' }
9
+ let(:page) { '1' }
10
+
11
+ it 'includes various classes' do
12
+ expect(subject).to include('route_num')
13
+ expect(subject).to include('scheduled')
14
+ expect(subject).to include('act_est')
15
+ end
16
+ end
17
+
18
+ context 'when Excon raises an error' do
19
+ let(:session_id) { '0000ITjAiGUWm-mBBpAyeGZ43Rv:187j4ddrq' }
20
+ let(:page) { '1' }
21
+
22
+ it 'reraises as a TrainFetcher::Error' do
23
+ expect(Excon).to receive(:get) {
24
+ fail Excon::Errors::ClientError, ''
25
+ }
26
+ expect { subject }.to raise_error(Amtrak::TrainFetcher::Error)
27
+ end
28
+
29
+ it 'reraises as a TrainFetcher::Error' do
30
+ expect(Excon).to receive(:get) {
31
+ fail Excon::Errors::ServerError, ''
32
+ }
33
+ expect { subject }.to raise_error(Amtrak::TrainFetcher::Error)
34
+ end
35
+ end
36
+ end
37
+ end