amtrak 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +2 -0
- data/.rubocop.yml +6 -0
- data/.travis.yml +5 -0
- data/Gemfile +3 -0
- data/README.markdown +3 -0
- data/Rakefile +23 -0
- data/amtrak.gemspec +29 -0
- data/lib/amtrak.rb +14 -0
- data/lib/amtrak/train_fetcher.rb +39 -0
- data/lib/amtrak/train_fetcher/main_page.rb +70 -0
- data/lib/amtrak/train_fetcher/train_page.rb +41 -0
- data/lib/amtrak/train_parser.rb +90 -0
- data/lib/amtrak/version.rb +6 -0
- data/spec/amtrak/train_fetcher/main_page_spec.rb +75 -0
- data/spec/amtrak/train_fetcher/train_page_spec.rb +37 -0
- data/spec/amtrak/train_fetcher_spec.rb +59 -0
- data/spec/amtrak/train_parser_spec.rb +230 -0
- data/spec/amtrak_spec.rb +243 -0
- data/spec/fixtures/html/pvd_to_bby.html +2401 -0
- data/spec/fixtures/vcr/Amtrak/_get/returns_a_list_of_train_times.yml +1469 -0
- data/spec/fixtures/vcr/Amtrak_TrainFetcher/_get/does_the_same_as_get.yml +1467 -0
- data/spec/fixtures/vcr/Amtrak_TrainFetcher/_get/given_a_valid_date_and_invalid_train_stations/does_not_include_various_classes_and_includes_an_error.yml +843 -0
- data/spec/fixtures/vcr/Amtrak_TrainFetcher/_get/given_a_valid_date_and_train_stations/includes_various_classes.yml +1467 -0
- data/spec/fixtures/vcr/Amtrak_TrainFetcher/_get/given_an_invalid_date_and_valid_train_stations/does_not_include_various_classes_and_includes_an_error.yml +838 -0
- data/spec/fixtures/vcr/Amtrak_TrainFetcher_MainPage/_session_id/pulls_the_session_id_from_the_cookies.yml +1082 -0
- data/spec/fixtures/vcr/Amtrak_TrainFetcher_MainPage/_total_pages/when_more_than_one_page_exists_on_the_website/returns_1.yml +1076 -0
- data/spec/fixtures/vcr/Amtrak_TrainFetcher_MainPage/_total_pages/when_more_than_one_page_exists_on_the_website/returns_2.yml +1074 -0
- data/spec/fixtures/vcr/Amtrak_TrainFetcher_MainPage/_total_pages/when_only_one_page_exists_on_the_website/returns_1.yml +1082 -0
- data/spec/fixtures/vcr/Amtrak_TrainFetcher_TrainPage/_get/when_it_works/includes_various_classes.yml +247 -0
- data/spec/spec_helper.rb +18 -0
- data/spec/support/vcr.rb +10 -0
- metadata +206 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 893ffef1c08c1763b3a34c0f70d9dd303f89385f
|
4
|
+
data.tar.gz: c2e940a65b8bf344359b2ca25a15b0c853776c3a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f98144098bbe5944ababe3817acc7713a838279dc63b1a9cacf5af17a34584faaf879c8ebce826a462867ddab44bce7fb5db5e1fca9ad4acce5ad05a60edf442
|
7
|
+
data.tar.gz: 42ce40b14aeb3341e0d46e20512b2275fb80e02d51cdf03ff475314974084db1190244c340f79cf6c175add675cc2d261e0d3a28bf1cd6249008c109cc10dd84
|
data/.gitignore
ADDED
data/.rubocop.yml
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/README.markdown
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'rake'
|
2
|
+
|
3
|
+
default_tasks = []
|
4
|
+
|
5
|
+
begin
|
6
|
+
require 'rubocop/rake_task'
|
7
|
+
RuboCop::RakeTask.new(:rubocop) do |task|
|
8
|
+
task.patterns = ['lib/**/*.rb','spec/**/*.rb']
|
9
|
+
end
|
10
|
+
|
11
|
+
default_tasks << :rubocop
|
12
|
+
rescue LoadError
|
13
|
+
end
|
14
|
+
|
15
|
+
begin
|
16
|
+
require 'rspec/core/rake_task'
|
17
|
+
RSpec::Core::RakeTask.new(:spec)
|
18
|
+
|
19
|
+
default_tasks << :spec
|
20
|
+
rescue LoadError
|
21
|
+
end
|
22
|
+
|
23
|
+
task default: default_tasks
|
data/amtrak.gemspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'amtrak/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = 'amtrak'
|
8
|
+
spec.version = Amtrak::Version::VERSION
|
9
|
+
spec.authors = ['Todd Lunter']
|
10
|
+
spec.email = ['tlunter@gmail.com']
|
11
|
+
spec.description = %q{Scrape train times from Amtrak.com}
|
12
|
+
spec.summary = %q{Scrape train times from Amtrak.com}
|
13
|
+
spec.homepage = 'https://github.com/tlunter/amtrak_gem'
|
14
|
+
spec.license = 'MIT'
|
15
|
+
spec.files = `git ls-files`.split($/)
|
16
|
+
spec.executables = spec.files.grep(%{^bin/_}) { File.basename(f) }
|
17
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
|
+
spec.require_paths = ['lib']
|
19
|
+
|
20
|
+
spec.add_dependency 'excon', '~> 0.41.0'
|
21
|
+
spec.add_dependency 'nokogiri', '~> 1.6.4'
|
22
|
+
|
23
|
+
spec.add_development_dependency 'rake'
|
24
|
+
spec.add_development_dependency 'pry'
|
25
|
+
spec.add_development_dependency 'rspec', '~> 3.0.0'
|
26
|
+
spec.add_development_dependency 'rubocop', '~> 0.25.0'
|
27
|
+
spec.add_development_dependency 'simplecov', '~> 0.8.2'
|
28
|
+
spec.add_development_dependency 'vcr', '~> 2.9.2'
|
29
|
+
end
|
data/lib/amtrak.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# Main Amtrak module
|
2
|
+
module Amtrak
|
3
|
+
class Error < StandardError; end
|
4
|
+
|
5
|
+
def self.get(from, to, date: nil)
|
6
|
+
Amtrak::TrainFetcher.get(from, to, date: date).map do |html|
|
7
|
+
Amtrak::TrainParser.parse(html)
|
8
|
+
end.flatten
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
require 'amtrak/train_parser'
|
13
|
+
require 'amtrak/train_fetcher'
|
14
|
+
require 'amtrak/version'
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Amtrak
|
2
|
+
# Service for getting train time HTML page from the Amtrak website
|
3
|
+
class TrainFetcher
|
4
|
+
class Error < Amtrak::Error; end
|
5
|
+
|
6
|
+
def self.get(*args)
|
7
|
+
new(*args).get
|
8
|
+
end
|
9
|
+
|
10
|
+
attr_reader :from, :to, :date
|
11
|
+
|
12
|
+
def initialize(from, to, date: nil)
|
13
|
+
@from = from
|
14
|
+
@to = to
|
15
|
+
@date = date
|
16
|
+
end
|
17
|
+
|
18
|
+
def get
|
19
|
+
(1..total_pages).map do |page|
|
20
|
+
Amtrak::TrainFetcher::TrainPage.get(session_id, page)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def first_page
|
25
|
+
@first_page ||= Amtrak::TrainFetcher::MainPage.new(from, to, date: date)
|
26
|
+
end
|
27
|
+
|
28
|
+
def session_id
|
29
|
+
@session_id ||= first_page.session_id
|
30
|
+
end
|
31
|
+
|
32
|
+
def total_pages
|
33
|
+
@total_pages ||= first_page.total_pages
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
require 'amtrak/train_fetcher/main_page'
|
39
|
+
require 'amtrak/train_fetcher/train_page'
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'excon'
|
2
|
+
require 'date'
|
3
|
+
|
4
|
+
module Amtrak
|
5
|
+
class TrainFetcher
|
6
|
+
# Service for getting train time results/cookies from the Amtrak website
|
7
|
+
class MainPage
|
8
|
+
attr_reader :from, :to
|
9
|
+
|
10
|
+
def initialize(from, to, date: nil)
|
11
|
+
@from = from
|
12
|
+
@to = to
|
13
|
+
@date = date
|
14
|
+
end
|
15
|
+
|
16
|
+
def page
|
17
|
+
@page ||= Excon.post(
|
18
|
+
'http://tickets.amtrak.com/itd/amtrak',
|
19
|
+
headers: headers,
|
20
|
+
body: URI.encode_www_form(body),
|
21
|
+
expects: [200]
|
22
|
+
)
|
23
|
+
rescue Excon::Errors::ClientError, Excon::Errors::ServerError => ex
|
24
|
+
raise Amtrak::TrainFetcher::Error, "#{ex.class} #{ex.message}"
|
25
|
+
end
|
26
|
+
|
27
|
+
def headers
|
28
|
+
{ 'Content-Type' => 'application/x-www-form-urlencoded' }
|
29
|
+
end
|
30
|
+
|
31
|
+
# rubocop:disable all
|
32
|
+
def body
|
33
|
+
{
|
34
|
+
"_handler=amtrak.presentation.handler.request.rail.AmtrakRailTrainStatusSearchRequestHandler/_xpath=/sessionWorkflow/productWorkflow[@product='Rail']" => '',
|
35
|
+
"/sessionWorkflow/productWorkflow[@product='Rail']/tripRequirements/journeyRequirements[1]/departDate.date" => departure_date,
|
36
|
+
'requestor' => 'amtrak.presentation.handler.page.rail.AmtrakRailGetTrainStatusPageHandler',
|
37
|
+
'xwdf_trainNumber' => "/sessionWorkflow/productWorkflow[@product='Rail']/tripRequirements/journeyRequirements[1]/segmentRequirements[1]/serviceCode",
|
38
|
+
'wdf_trainNumber' => 'optional',
|
39
|
+
'xwdf_SortBy' => "/sessionWorkflow/productWorkflow[@product='Rail']/tripRequirements/journeyRequirements[1]/departDate/@radioSelect",
|
40
|
+
'wdf_SortBy' => 'arrivalTime',
|
41
|
+
'xwdf_origin' => "/sessionWorkflow/productWorkflow[@product='Rail']/travelSelection/journeySelection[1]/departLocation/search",
|
42
|
+
'wdf_origin' => from.to_s,
|
43
|
+
'xwdf_destination' => "/sessionWorkflow/productWorkflow[@product='Rail']/travelSelection/journeySelection[1]/arriveLocation/search",
|
44
|
+
'wdf_destination' => to.to_s,
|
45
|
+
}
|
46
|
+
end
|
47
|
+
# rubocop:enable all
|
48
|
+
|
49
|
+
def departure_date
|
50
|
+
date.strftime('%a, %b %-d, %Y')
|
51
|
+
end
|
52
|
+
|
53
|
+
def date
|
54
|
+
@date ||= Date.today
|
55
|
+
end
|
56
|
+
|
57
|
+
def session_id
|
58
|
+
page.headers['Set-Cookie'].match(/JSESSIONID=([^;]*)/)[1]
|
59
|
+
end
|
60
|
+
|
61
|
+
def total_pages
|
62
|
+
(Float(extract_listing_length) / 10).ceil
|
63
|
+
end
|
64
|
+
|
65
|
+
def extract_listing_length
|
66
|
+
page.body.match(/var availabilityLength = '(\d+)';/)[1]
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'excon'
|
2
|
+
|
3
|
+
module Amtrak
|
4
|
+
class TrainFetcher
|
5
|
+
# Service for getting per page train time HTML from the Amtrak website
|
6
|
+
class TrainPage
|
7
|
+
def self.get(*args)
|
8
|
+
new(*args).get
|
9
|
+
end
|
10
|
+
|
11
|
+
attr_reader :session_id, :page
|
12
|
+
|
13
|
+
def initialize(session_id, page)
|
14
|
+
@session_id = session_id
|
15
|
+
@page = page
|
16
|
+
end
|
17
|
+
|
18
|
+
def get
|
19
|
+
request.body
|
20
|
+
end
|
21
|
+
|
22
|
+
def request
|
23
|
+
@request ||= Excon.get(
|
24
|
+
'https://tickets.amtrak.com/itd/amtrak/TrainStatusRequest',
|
25
|
+
headers: headers,
|
26
|
+
query: query
|
27
|
+
)
|
28
|
+
rescue Excon::Errors::ClientError, Excon::Errors::ServerError => ex
|
29
|
+
raise Amtrak::TrainFetcher::Error, "#{ex.class} #{ex.message}"
|
30
|
+
end
|
31
|
+
|
32
|
+
def headers
|
33
|
+
{ 'Cookie' => "JSESSIONID=#{session_id}" }
|
34
|
+
end
|
35
|
+
|
36
|
+
def query
|
37
|
+
{ '_trainstatuspage' => page }
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module Amtrak
|
4
|
+
# Will take in an HTML document as a string and parse out the train schedule
|
5
|
+
class TrainParser
|
6
|
+
# Error raised when the parser runs into an issue
|
7
|
+
class Error < Amtrak::Error; end
|
8
|
+
|
9
|
+
def self.parse(text)
|
10
|
+
new(text).parse
|
11
|
+
end
|
12
|
+
|
13
|
+
attr_reader :document
|
14
|
+
|
15
|
+
def initialize(document)
|
16
|
+
@document = Nokogiri::HTML.parse(document)
|
17
|
+
end
|
18
|
+
|
19
|
+
def parse
|
20
|
+
trains = []
|
21
|
+
|
22
|
+
until train_nodes.empty?
|
23
|
+
departure, arrival = train_nodes.shift(2)
|
24
|
+
trains << {
|
25
|
+
number: parse_train_number(departure),
|
26
|
+
departure: parse_train(departure),
|
27
|
+
arrival: parse_train(arrival)
|
28
|
+
}
|
29
|
+
end
|
30
|
+
|
31
|
+
trains
|
32
|
+
rescue Nokogiri::SyntaxError => ex
|
33
|
+
raise Amtrak::TrainParser::Error, "#{ex.class} #{ex.message}"
|
34
|
+
end
|
35
|
+
|
36
|
+
def train_nodes
|
37
|
+
@train_nodes ||= document.search(
|
38
|
+
"//tr[contains(@class, 'status_result')]"
|
39
|
+
).tap { |results| fail 'No trains found' unless results.count > 0 }.to_a
|
40
|
+
end
|
41
|
+
|
42
|
+
def parse_train_number(node)
|
43
|
+
find!(
|
44
|
+
node, ".//th[@class='service']/div[@class='route_num']/text()"
|
45
|
+
).to_s.to_i
|
46
|
+
end
|
47
|
+
|
48
|
+
def parse_train(node) # rubocop:disable Metrics/MethodLength
|
49
|
+
scheduled_date = find!(
|
50
|
+
node, ".//td[@class='scheduled']/div[@class='date']/text()"
|
51
|
+
).to_s
|
52
|
+
estimated_date = find!(
|
53
|
+
node, ".//td[@class='act_est']/div[@class='date']/text()"
|
54
|
+
).to_s
|
55
|
+
scheduled_time = remove_parentheses(
|
56
|
+
find!(node, ".//td[@class='scheduled']/div[@class='time']/text()").to_s
|
57
|
+
)
|
58
|
+
estimated_time = remove_parentheses(
|
59
|
+
find!(node, ".//td[@class='act_est']/div[@class='time']/text()").to_s
|
60
|
+
)
|
61
|
+
|
62
|
+
{
|
63
|
+
scheduled_date: scheduled_date,
|
64
|
+
estimated_date: estimated_date,
|
65
|
+
scheduled_time: scheduled_time,
|
66
|
+
estimated_time: estimated_time
|
67
|
+
}
|
68
|
+
end
|
69
|
+
|
70
|
+
def find!(node, xpath)
|
71
|
+
node.search(xpath).tap { |rs| fail "#{rs.count} results" if rs.count > 1 }
|
72
|
+
end
|
73
|
+
|
74
|
+
def make_datetime(date, time)
|
75
|
+
if date.nil? || time.nil?
|
76
|
+
return
|
77
|
+
else
|
78
|
+
DateTime.parse("#{date} at #{time}")
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def remove_parentheses(time_string)
|
83
|
+
if matches = /\(([^)]+)\)/.match(time_string)
|
84
|
+
matches[1]
|
85
|
+
else
|
86
|
+
time_string
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Amtrak::TrainFetcher::MainPage do
|
4
|
+
describe '#departure_date' do
|
5
|
+
subject { described_class.new('', '', date: date) }
|
6
|
+
let(:date) { Date.parse('2014-11-12') }
|
7
|
+
|
8
|
+
it 'prints out a formatted date' do
|
9
|
+
expect(subject.departure_date).to eq('Wed, Nov 12, 2014')
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
describe '#date' do
|
14
|
+
subject { described_class.new('', '', date: date) }
|
15
|
+
|
16
|
+
context 'on an instance with a date' do
|
17
|
+
let(:date) { Date.parse('2014-11-12') }
|
18
|
+
|
19
|
+
it 'returns the set date' do
|
20
|
+
expect(subject.date).to eq(date)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
context 'on an instance without a date' do
|
25
|
+
let(:date) { nil }
|
26
|
+
let(:stubbed_date) { Date.parse('2014-11-12') }
|
27
|
+
|
28
|
+
it 'returns the current date' do
|
29
|
+
expect(Date).to receive(:today).and_return(stubbed_date)
|
30
|
+
expect(subject.date).to eq(stubbed_date)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
describe '#total_pages' do
|
36
|
+
context 'when only one page exists on the website' do
|
37
|
+
subject { described_class.new('pvd', 'bby', date: Date.parse('2014-11-28')) }
|
38
|
+
|
39
|
+
it 'returns 1', :vcr do
|
40
|
+
expect(subject.total_pages).to eq(1)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
context 'when more than one page exists on the website' do
|
45
|
+
subject { described_class.new('pvd', 'bby', date: Date.parse('2014-11-27')) }
|
46
|
+
|
47
|
+
it 'returns 2', :vcr do
|
48
|
+
expect(subject.total_pages).to eq(2)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe '#session_id' do
|
54
|
+
subject { described_class.new('pvd', 'bby', date: Date.parse('2014-11-28')) }
|
55
|
+
it 'pulls the session id from the cookies', :vcr do
|
56
|
+
expect(subject.session_id).to eq('0000kOxPFtO4mDoSiIGk2yzvAz6:187j4dq9a')
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
describe '#page' do
|
61
|
+
context 'when Excon raises' do
|
62
|
+
subject { described_class.new('', '') }
|
63
|
+
|
64
|
+
it 'returns a TrainFetcher::Error' do
|
65
|
+
expect(Excon).to receive(:post) { fail Excon::Errors::ClientError, '' }
|
66
|
+
expect { subject.page }.to raise_error(Amtrak::TrainFetcher::Error)
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'returns a TrainFetcher::Error' do
|
70
|
+
expect(Excon).to receive(:post) { fail Excon::Errors::ServerError, '' }
|
71
|
+
expect { subject.page }.to raise_error(Amtrak::TrainFetcher::Error)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Amtrak::TrainFetcher::TrainPage do
|
4
|
+
subject { described_class.get(session_id, page) }
|
5
|
+
|
6
|
+
describe '#get', :vcr do
|
7
|
+
context 'when it works' do
|
8
|
+
let(:session_id) { '0000ITjAiGUWm-mBBpAyeGZ43Rv:187j4ddrq' }
|
9
|
+
let(:page) { '1' }
|
10
|
+
|
11
|
+
it 'includes various classes' do
|
12
|
+
expect(subject).to include('route_num')
|
13
|
+
expect(subject).to include('scheduled')
|
14
|
+
expect(subject).to include('act_est')
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
context 'when Excon raises an error' do
|
19
|
+
let(:session_id) { '0000ITjAiGUWm-mBBpAyeGZ43Rv:187j4ddrq' }
|
20
|
+
let(:page) { '1' }
|
21
|
+
|
22
|
+
it 'reraises as a TrainFetcher::Error' do
|
23
|
+
expect(Excon).to receive(:get) {
|
24
|
+
fail Excon::Errors::ClientError, ''
|
25
|
+
}
|
26
|
+
expect { subject }.to raise_error(Amtrak::TrainFetcher::Error)
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'reraises as a TrainFetcher::Error' do
|
30
|
+
expect(Excon).to receive(:get) {
|
31
|
+
fail Excon::Errors::ServerError, ''
|
32
|
+
}
|
33
|
+
expect { subject }.to raise_error(Amtrak::TrainFetcher::Error)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|