amtrak 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +2 -0
- data/.rubocop.yml +6 -0
- data/.travis.yml +5 -0
- data/Gemfile +3 -0
- data/README.markdown +3 -0
- data/Rakefile +23 -0
- data/amtrak.gemspec +29 -0
- data/lib/amtrak.rb +14 -0
- data/lib/amtrak/train_fetcher.rb +39 -0
- data/lib/amtrak/train_fetcher/main_page.rb +70 -0
- data/lib/amtrak/train_fetcher/train_page.rb +41 -0
- data/lib/amtrak/train_parser.rb +90 -0
- data/lib/amtrak/version.rb +6 -0
- data/spec/amtrak/train_fetcher/main_page_spec.rb +75 -0
- data/spec/amtrak/train_fetcher/train_page_spec.rb +37 -0
- data/spec/amtrak/train_fetcher_spec.rb +59 -0
- data/spec/amtrak/train_parser_spec.rb +230 -0
- data/spec/amtrak_spec.rb +243 -0
- data/spec/fixtures/html/pvd_to_bby.html +2401 -0
- data/spec/fixtures/vcr/Amtrak/_get/returns_a_list_of_train_times.yml +1469 -0
- data/spec/fixtures/vcr/Amtrak_TrainFetcher/_get/does_the_same_as_get.yml +1467 -0
- data/spec/fixtures/vcr/Amtrak_TrainFetcher/_get/given_a_valid_date_and_invalid_train_stations/does_not_include_various_classes_and_includes_an_error.yml +843 -0
- data/spec/fixtures/vcr/Amtrak_TrainFetcher/_get/given_a_valid_date_and_train_stations/includes_various_classes.yml +1467 -0
- data/spec/fixtures/vcr/Amtrak_TrainFetcher/_get/given_an_invalid_date_and_valid_train_stations/does_not_include_various_classes_and_includes_an_error.yml +838 -0
- data/spec/fixtures/vcr/Amtrak_TrainFetcher_MainPage/_session_id/pulls_the_session_id_from_the_cookies.yml +1082 -0
- data/spec/fixtures/vcr/Amtrak_TrainFetcher_MainPage/_total_pages/when_more_than_one_page_exists_on_the_website/returns_1.yml +1076 -0
- data/spec/fixtures/vcr/Amtrak_TrainFetcher_MainPage/_total_pages/when_more_than_one_page_exists_on_the_website/returns_2.yml +1074 -0
- data/spec/fixtures/vcr/Amtrak_TrainFetcher_MainPage/_total_pages/when_only_one_page_exists_on_the_website/returns_1.yml +1082 -0
- data/spec/fixtures/vcr/Amtrak_TrainFetcher_TrainPage/_get/when_it_works/includes_various_classes.yml +247 -0
- data/spec/spec_helper.rb +18 -0
- data/spec/support/vcr.rb +10 -0
- metadata +206 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 893ffef1c08c1763b3a34c0f70d9dd303f89385f
|
4
|
+
data.tar.gz: c2e940a65b8bf344359b2ca25a15b0c853776c3a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f98144098bbe5944ababe3817acc7713a838279dc63b1a9cacf5af17a34584faaf879c8ebce826a462867ddab44bce7fb5db5e1fca9ad4acce5ad05a60edf442
|
7
|
+
data.tar.gz: 42ce40b14aeb3341e0d46e20512b2275fb80e02d51cdf03ff475314974084db1190244c340f79cf6c175add675cc2d261e0d3a28bf1cd6249008c109cc10dd84
|
data/.gitignore
ADDED
data/.rubocop.yml
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/README.markdown
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'rake'
|
2
|
+
|
3
|
+
default_tasks = []
|
4
|
+
|
5
|
+
begin
|
6
|
+
require 'rubocop/rake_task'
|
7
|
+
RuboCop::RakeTask.new(:rubocop) do |task|
|
8
|
+
task.patterns = ['lib/**/*.rb','spec/**/*.rb']
|
9
|
+
end
|
10
|
+
|
11
|
+
default_tasks << :rubocop
|
12
|
+
rescue LoadError
|
13
|
+
end
|
14
|
+
|
15
|
+
begin
|
16
|
+
require 'rspec/core/rake_task'
|
17
|
+
RSpec::Core::RakeTask.new(:spec)
|
18
|
+
|
19
|
+
default_tasks << :spec
|
20
|
+
rescue LoadError
|
21
|
+
end
|
22
|
+
|
23
|
+
task default: default_tasks
|
data/amtrak.gemspec
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'amtrak/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = 'amtrak'
|
8
|
+
spec.version = Amtrak::Version::VERSION
|
9
|
+
spec.authors = ['Todd Lunter']
|
10
|
+
spec.email = ['tlunter@gmail.com']
|
11
|
+
spec.description = %q{Scrape train times from Amtrak.com}
|
12
|
+
spec.summary = %q{Scrape train times from Amtrak.com}
|
13
|
+
spec.homepage = 'https://github.com/tlunter/amtrak_gem'
|
14
|
+
spec.license = 'MIT'
|
15
|
+
spec.files = `git ls-files`.split($/)
|
16
|
+
spec.executables = spec.files.grep(%{^bin/_}) { File.basename(f) }
|
17
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
|
+
spec.require_paths = ['lib']
|
19
|
+
|
20
|
+
spec.add_dependency 'excon', '~> 0.41.0'
|
21
|
+
spec.add_dependency 'nokogiri', '~> 1.6.4'
|
22
|
+
|
23
|
+
spec.add_development_dependency 'rake'
|
24
|
+
spec.add_development_dependency 'pry'
|
25
|
+
spec.add_development_dependency 'rspec', '~> 3.0.0'
|
26
|
+
spec.add_development_dependency 'rubocop', '~> 0.25.0'
|
27
|
+
spec.add_development_dependency 'simplecov', '~> 0.8.2'
|
28
|
+
spec.add_development_dependency 'vcr', '~> 2.9.2'
|
29
|
+
end
|
data/lib/amtrak.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# Main Amtrak module
|
2
|
+
module Amtrak
|
3
|
+
class Error < StandardError; end
|
4
|
+
|
5
|
+
def self.get(from, to, date: nil)
|
6
|
+
Amtrak::TrainFetcher.get(from, to, date: date).map do |html|
|
7
|
+
Amtrak::TrainParser.parse(html)
|
8
|
+
end.flatten
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
require 'amtrak/train_parser'
|
13
|
+
require 'amtrak/train_fetcher'
|
14
|
+
require 'amtrak/version'
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Amtrak
|
2
|
+
# Service for getting train time HTML page from the Amtrak website
|
3
|
+
class TrainFetcher
|
4
|
+
class Error < Amtrak::Error; end
|
5
|
+
|
6
|
+
def self.get(*args)
|
7
|
+
new(*args).get
|
8
|
+
end
|
9
|
+
|
10
|
+
attr_reader :from, :to, :date
|
11
|
+
|
12
|
+
def initialize(from, to, date: nil)
|
13
|
+
@from = from
|
14
|
+
@to = to
|
15
|
+
@date = date
|
16
|
+
end
|
17
|
+
|
18
|
+
def get
|
19
|
+
(1..total_pages).map do |page|
|
20
|
+
Amtrak::TrainFetcher::TrainPage.get(session_id, page)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def first_page
|
25
|
+
@first_page ||= Amtrak::TrainFetcher::MainPage.new(from, to, date: date)
|
26
|
+
end
|
27
|
+
|
28
|
+
def session_id
|
29
|
+
@session_id ||= first_page.session_id
|
30
|
+
end
|
31
|
+
|
32
|
+
def total_pages
|
33
|
+
@total_pages ||= first_page.total_pages
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
require 'amtrak/train_fetcher/main_page'
|
39
|
+
require 'amtrak/train_fetcher/train_page'
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'excon'
|
2
|
+
require 'date'
|
3
|
+
|
4
|
+
module Amtrak
|
5
|
+
class TrainFetcher
|
6
|
+
# Service for getting train time results/cookies from the Amtrak website
|
7
|
+
class MainPage
|
8
|
+
attr_reader :from, :to
|
9
|
+
|
10
|
+
def initialize(from, to, date: nil)
|
11
|
+
@from = from
|
12
|
+
@to = to
|
13
|
+
@date = date
|
14
|
+
end
|
15
|
+
|
16
|
+
def page
|
17
|
+
@page ||= Excon.post(
|
18
|
+
'http://tickets.amtrak.com/itd/amtrak',
|
19
|
+
headers: headers,
|
20
|
+
body: URI.encode_www_form(body),
|
21
|
+
expects: [200]
|
22
|
+
)
|
23
|
+
rescue Excon::Errors::ClientError, Excon::Errors::ServerError => ex
|
24
|
+
raise Amtrak::TrainFetcher::Error, "#{ex.class} #{ex.message}"
|
25
|
+
end
|
26
|
+
|
27
|
+
def headers
|
28
|
+
{ 'Content-Type' => 'application/x-www-form-urlencoded' }
|
29
|
+
end
|
30
|
+
|
31
|
+
# rubocop:disable all
|
32
|
+
def body
|
33
|
+
{
|
34
|
+
"_handler=amtrak.presentation.handler.request.rail.AmtrakRailTrainStatusSearchRequestHandler/_xpath=/sessionWorkflow/productWorkflow[@product='Rail']" => '',
|
35
|
+
"/sessionWorkflow/productWorkflow[@product='Rail']/tripRequirements/journeyRequirements[1]/departDate.date" => departure_date,
|
36
|
+
'requestor' => 'amtrak.presentation.handler.page.rail.AmtrakRailGetTrainStatusPageHandler',
|
37
|
+
'xwdf_trainNumber' => "/sessionWorkflow/productWorkflow[@product='Rail']/tripRequirements/journeyRequirements[1]/segmentRequirements[1]/serviceCode",
|
38
|
+
'wdf_trainNumber' => 'optional',
|
39
|
+
'xwdf_SortBy' => "/sessionWorkflow/productWorkflow[@product='Rail']/tripRequirements/journeyRequirements[1]/departDate/@radioSelect",
|
40
|
+
'wdf_SortBy' => 'arrivalTime',
|
41
|
+
'xwdf_origin' => "/sessionWorkflow/productWorkflow[@product='Rail']/travelSelection/journeySelection[1]/departLocation/search",
|
42
|
+
'wdf_origin' => from.to_s,
|
43
|
+
'xwdf_destination' => "/sessionWorkflow/productWorkflow[@product='Rail']/travelSelection/journeySelection[1]/arriveLocation/search",
|
44
|
+
'wdf_destination' => to.to_s,
|
45
|
+
}
|
46
|
+
end
|
47
|
+
# rubocop:enable all
|
48
|
+
|
49
|
+
def departure_date
|
50
|
+
date.strftime('%a, %b %-d, %Y')
|
51
|
+
end
|
52
|
+
|
53
|
+
def date
|
54
|
+
@date ||= Date.today
|
55
|
+
end
|
56
|
+
|
57
|
+
def session_id
|
58
|
+
page.headers['Set-Cookie'].match(/JSESSIONID=([^;]*)/)[1]
|
59
|
+
end
|
60
|
+
|
61
|
+
def total_pages
|
62
|
+
(Float(extract_listing_length) / 10).ceil
|
63
|
+
end
|
64
|
+
|
65
|
+
def extract_listing_length
|
66
|
+
page.body.match(/var availabilityLength = '(\d+)';/)[1]
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'excon'
|
2
|
+
|
3
|
+
module Amtrak
|
4
|
+
class TrainFetcher
|
5
|
+
# Service for getting per page train time HTML from the Amtrak website
|
6
|
+
class TrainPage
|
7
|
+
def self.get(*args)
|
8
|
+
new(*args).get
|
9
|
+
end
|
10
|
+
|
11
|
+
attr_reader :session_id, :page
|
12
|
+
|
13
|
+
def initialize(session_id, page)
|
14
|
+
@session_id = session_id
|
15
|
+
@page = page
|
16
|
+
end
|
17
|
+
|
18
|
+
def get
|
19
|
+
request.body
|
20
|
+
end
|
21
|
+
|
22
|
+
def request
|
23
|
+
@request ||= Excon.get(
|
24
|
+
'https://tickets.amtrak.com/itd/amtrak/TrainStatusRequest',
|
25
|
+
headers: headers,
|
26
|
+
query: query
|
27
|
+
)
|
28
|
+
rescue Excon::Errors::ClientError, Excon::Errors::ServerError => ex
|
29
|
+
raise Amtrak::TrainFetcher::Error, "#{ex.class} #{ex.message}"
|
30
|
+
end
|
31
|
+
|
32
|
+
def headers
|
33
|
+
{ 'Cookie' => "JSESSIONID=#{session_id}" }
|
34
|
+
end
|
35
|
+
|
36
|
+
def query
|
37
|
+
{ '_trainstatuspage' => page }
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module Amtrak
|
4
|
+
# Will take in an HTML document as a string and parse out the train schedule
|
5
|
+
class TrainParser
|
6
|
+
# Error raised when the parser runs into an issue
|
7
|
+
class Error < Amtrak::Error; end
|
8
|
+
|
9
|
+
def self.parse(text)
|
10
|
+
new(text).parse
|
11
|
+
end
|
12
|
+
|
13
|
+
attr_reader :document
|
14
|
+
|
15
|
+
def initialize(document)
|
16
|
+
@document = Nokogiri::HTML.parse(document)
|
17
|
+
end
|
18
|
+
|
19
|
+
def parse
|
20
|
+
trains = []
|
21
|
+
|
22
|
+
until train_nodes.empty?
|
23
|
+
departure, arrival = train_nodes.shift(2)
|
24
|
+
trains << {
|
25
|
+
number: parse_train_number(departure),
|
26
|
+
departure: parse_train(departure),
|
27
|
+
arrival: parse_train(arrival)
|
28
|
+
}
|
29
|
+
end
|
30
|
+
|
31
|
+
trains
|
32
|
+
rescue Nokogiri::SyntaxError => ex
|
33
|
+
raise Amtrak::TrainParser::Error, "#{ex.class} #{ex.message}"
|
34
|
+
end
|
35
|
+
|
36
|
+
def train_nodes
|
37
|
+
@train_nodes ||= document.search(
|
38
|
+
"//tr[contains(@class, 'status_result')]"
|
39
|
+
).tap { |results| fail 'No trains found' unless results.count > 0 }.to_a
|
40
|
+
end
|
41
|
+
|
42
|
+
def parse_train_number(node)
|
43
|
+
find!(
|
44
|
+
node, ".//th[@class='service']/div[@class='route_num']/text()"
|
45
|
+
).to_s.to_i
|
46
|
+
end
|
47
|
+
|
48
|
+
def parse_train(node) # rubocop:disable Metrics/MethodLength
|
49
|
+
scheduled_date = find!(
|
50
|
+
node, ".//td[@class='scheduled']/div[@class='date']/text()"
|
51
|
+
).to_s
|
52
|
+
estimated_date = find!(
|
53
|
+
node, ".//td[@class='act_est']/div[@class='date']/text()"
|
54
|
+
).to_s
|
55
|
+
scheduled_time = remove_parentheses(
|
56
|
+
find!(node, ".//td[@class='scheduled']/div[@class='time']/text()").to_s
|
57
|
+
)
|
58
|
+
estimated_time = remove_parentheses(
|
59
|
+
find!(node, ".//td[@class='act_est']/div[@class='time']/text()").to_s
|
60
|
+
)
|
61
|
+
|
62
|
+
{
|
63
|
+
scheduled_date: scheduled_date,
|
64
|
+
estimated_date: estimated_date,
|
65
|
+
scheduled_time: scheduled_time,
|
66
|
+
estimated_time: estimated_time
|
67
|
+
}
|
68
|
+
end
|
69
|
+
|
70
|
+
def find!(node, xpath)
|
71
|
+
node.search(xpath).tap { |rs| fail "#{rs.count} results" if rs.count > 1 }
|
72
|
+
end
|
73
|
+
|
74
|
+
def make_datetime(date, time)
|
75
|
+
if date.nil? || time.nil?
|
76
|
+
return
|
77
|
+
else
|
78
|
+
DateTime.parse("#{date} at #{time}")
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def remove_parentheses(time_string)
|
83
|
+
if matches = /\(([^)]+)\)/.match(time_string)
|
84
|
+
matches[1]
|
85
|
+
else
|
86
|
+
time_string
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Amtrak::TrainFetcher::MainPage do
|
4
|
+
describe '#departure_date' do
|
5
|
+
subject { described_class.new('', '', date: date) }
|
6
|
+
let(:date) { Date.parse('2014-11-12') }
|
7
|
+
|
8
|
+
it 'prints out a formatted date' do
|
9
|
+
expect(subject.departure_date).to eq('Wed, Nov 12, 2014')
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
describe '#date' do
|
14
|
+
subject { described_class.new('', '', date: date) }
|
15
|
+
|
16
|
+
context 'on an instance with a date' do
|
17
|
+
let(:date) { Date.parse('2014-11-12') }
|
18
|
+
|
19
|
+
it 'returns the set date' do
|
20
|
+
expect(subject.date).to eq(date)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
context 'on an instance without a date' do
|
25
|
+
let(:date) { nil }
|
26
|
+
let(:stubbed_date) { Date.parse('2014-11-12') }
|
27
|
+
|
28
|
+
it 'returns the current date' do
|
29
|
+
expect(Date).to receive(:today).and_return(stubbed_date)
|
30
|
+
expect(subject.date).to eq(stubbed_date)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
describe '#total_pages' do
|
36
|
+
context 'when only one page exists on the website' do
|
37
|
+
subject { described_class.new('pvd', 'bby', date: Date.parse('2014-11-28')) }
|
38
|
+
|
39
|
+
it 'returns 1', :vcr do
|
40
|
+
expect(subject.total_pages).to eq(1)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
context 'when more than one page exists on the website' do
|
45
|
+
subject { described_class.new('pvd', 'bby', date: Date.parse('2014-11-27')) }
|
46
|
+
|
47
|
+
it 'returns 2', :vcr do
|
48
|
+
expect(subject.total_pages).to eq(2)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe '#session_id' do
|
54
|
+
subject { described_class.new('pvd', 'bby', date: Date.parse('2014-11-28')) }
|
55
|
+
it 'pulls the session id from the cookies', :vcr do
|
56
|
+
expect(subject.session_id).to eq('0000kOxPFtO4mDoSiIGk2yzvAz6:187j4dq9a')
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
describe '#page' do
|
61
|
+
context 'when Excon raises' do
|
62
|
+
subject { described_class.new('', '') }
|
63
|
+
|
64
|
+
it 'returns a TrainFetcher::Error' do
|
65
|
+
expect(Excon).to receive(:post) { fail Excon::Errors::ClientError, '' }
|
66
|
+
expect { subject.page }.to raise_error(Amtrak::TrainFetcher::Error)
|
67
|
+
end
|
68
|
+
|
69
|
+
it 'returns a TrainFetcher::Error' do
|
70
|
+
expect(Excon).to receive(:post) { fail Excon::Errors::ServerError, '' }
|
71
|
+
expect { subject.page }.to raise_error(Amtrak::TrainFetcher::Error)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Amtrak::TrainFetcher::TrainPage do
|
4
|
+
subject { described_class.get(session_id, page) }
|
5
|
+
|
6
|
+
describe '#get', :vcr do
|
7
|
+
context 'when it works' do
|
8
|
+
let(:session_id) { '0000ITjAiGUWm-mBBpAyeGZ43Rv:187j4ddrq' }
|
9
|
+
let(:page) { '1' }
|
10
|
+
|
11
|
+
it 'includes various classes' do
|
12
|
+
expect(subject).to include('route_num')
|
13
|
+
expect(subject).to include('scheduled')
|
14
|
+
expect(subject).to include('act_est')
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
context 'when Excon raises an error' do
|
19
|
+
let(:session_id) { '0000ITjAiGUWm-mBBpAyeGZ43Rv:187j4ddrq' }
|
20
|
+
let(:page) { '1' }
|
21
|
+
|
22
|
+
it 'reraises as a TrainFetcher::Error' do
|
23
|
+
expect(Excon).to receive(:get) {
|
24
|
+
fail Excon::Errors::ClientError, ''
|
25
|
+
}
|
26
|
+
expect { subject }.to raise_error(Amtrak::TrainFetcher::Error)
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'reraises as a TrainFetcher::Error' do
|
30
|
+
expect(Excon).to receive(:get) {
|
31
|
+
fail Excon::Errors::ServerError, ''
|
32
|
+
}
|
33
|
+
expect { subject }.to raise_error(Amtrak::TrainFetcher::Error)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|