amtrak 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +2 -0
  3. data/.rubocop.yml +6 -0
  4. data/.travis.yml +5 -0
  5. data/Gemfile +3 -0
  6. data/README.markdown +3 -0
  7. data/Rakefile +23 -0
  8. data/amtrak.gemspec +29 -0
  9. data/lib/amtrak.rb +14 -0
  10. data/lib/amtrak/train_fetcher.rb +39 -0
  11. data/lib/amtrak/train_fetcher/main_page.rb +70 -0
  12. data/lib/amtrak/train_fetcher/train_page.rb +41 -0
  13. data/lib/amtrak/train_parser.rb +90 -0
  14. data/lib/amtrak/version.rb +6 -0
  15. data/spec/amtrak/train_fetcher/main_page_spec.rb +75 -0
  16. data/spec/amtrak/train_fetcher/train_page_spec.rb +37 -0
  17. data/spec/amtrak/train_fetcher_spec.rb +59 -0
  18. data/spec/amtrak/train_parser_spec.rb +230 -0
  19. data/spec/amtrak_spec.rb +243 -0
  20. data/spec/fixtures/html/pvd_to_bby.html +2401 -0
  21. data/spec/fixtures/vcr/Amtrak/_get/returns_a_list_of_train_times.yml +1469 -0
  22. data/spec/fixtures/vcr/Amtrak_TrainFetcher/_get/does_the_same_as_get.yml +1467 -0
  23. data/spec/fixtures/vcr/Amtrak_TrainFetcher/_get/given_a_valid_date_and_invalid_train_stations/does_not_include_various_classes_and_includes_an_error.yml +843 -0
  24. data/spec/fixtures/vcr/Amtrak_TrainFetcher/_get/given_a_valid_date_and_train_stations/includes_various_classes.yml +1467 -0
  25. data/spec/fixtures/vcr/Amtrak_TrainFetcher/_get/given_an_invalid_date_and_valid_train_stations/does_not_include_various_classes_and_includes_an_error.yml +838 -0
  26. data/spec/fixtures/vcr/Amtrak_TrainFetcher_MainPage/_session_id/pulls_the_session_id_from_the_cookies.yml +1082 -0
  27. data/spec/fixtures/vcr/Amtrak_TrainFetcher_MainPage/_total_pages/when_more_than_one_page_exists_on_the_website/returns_1.yml +1076 -0
  28. data/spec/fixtures/vcr/Amtrak_TrainFetcher_MainPage/_total_pages/when_more_than_one_page_exists_on_the_website/returns_2.yml +1074 -0
  29. data/spec/fixtures/vcr/Amtrak_TrainFetcher_MainPage/_total_pages/when_only_one_page_exists_on_the_website/returns_1.yml +1082 -0
  30. data/spec/fixtures/vcr/Amtrak_TrainFetcher_TrainPage/_get/when_it_works/includes_various_classes.yml +247 -0
  31. data/spec/spec_helper.rb +18 -0
  32. data/spec/support/vcr.rb +10 -0
  33. metadata +206 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 893ffef1c08c1763b3a34c0f70d9dd303f89385f
4
+ data.tar.gz: c2e940a65b8bf344359b2ca25a15b0c853776c3a
5
+ SHA512:
6
+ metadata.gz: f98144098bbe5944ababe3817acc7713a838279dc63b1a9cacf5af17a34584faaf879c8ebce826a462867ddab44bce7fb5db5e1fca9ad4acce5ad05a60edf442
7
+ data.tar.gz: 42ce40b14aeb3341e0d46e20512b2275fb80e02d51cdf03ff475314974084db1190244c340f79cf6c175add675cc2d261e0d3a28bf1cd6249008c109cc10dd84
@@ -0,0 +1,2 @@
1
+ Gemfile.lock
2
+ coverage
@@ -0,0 +1,6 @@
1
+ Lint/AssignmentInCondition:
2
+ Enabled: false
3
+ Metrics/MethodLength:
4
+ Max: 12
5
+ Metrics/LineLength:
6
+ Max: 100
@@ -0,0 +1,5 @@
1
+ language: ruby
2
+ rvm:
3
+ - 2.0.0
4
+ - 2.1.1
5
+ script: bundle exec rake
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
@@ -0,0 +1,3 @@
1
+ # Amtrak
2
+
3
+ Pulls down the train times from the Amtrak website
@@ -0,0 +1,23 @@
1
+ require 'rake'
2
+
3
+ default_tasks = []
4
+
5
+ begin
6
+ require 'rubocop/rake_task'
7
+ RuboCop::RakeTask.new(:rubocop) do |task|
8
+ task.patterns = ['lib/**/*.rb','spec/**/*.rb']
9
+ end
10
+
11
+ default_tasks << :rubocop
12
+ rescue LoadError
13
+ end
14
+
15
+ begin
16
+ require 'rspec/core/rake_task'
17
+ RSpec::Core::RakeTask.new(:spec)
18
+
19
+ default_tasks << :spec
20
+ rescue LoadError
21
+ end
22
+
23
+ task default: default_tasks
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'amtrak/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'amtrak'
8
+ spec.version = Amtrak::Version::VERSION
9
+ spec.authors = ['Todd Lunter']
10
+ spec.email = ['tlunter@gmail.com']
11
+ spec.description = %q{Scrape train times from Amtrak.com}
12
+ spec.summary = %q{Scrape train times from Amtrak.com}
13
+ spec.homepage = 'https://github.com/tlunter/amtrak_gem'
14
+ spec.license = 'MIT'
15
+ spec.files = `git ls-files`.split($/)
16
+ spec.executables = spec.files.grep(%{^bin/_}) { File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ['lib']
19
+
20
+ spec.add_dependency 'excon', '~> 0.41.0'
21
+ spec.add_dependency 'nokogiri', '~> 1.6.4'
22
+
23
+ spec.add_development_dependency 'rake'
24
+ spec.add_development_dependency 'pry'
25
+ spec.add_development_dependency 'rspec', '~> 3.0.0'
26
+ spec.add_development_dependency 'rubocop', '~> 0.25.0'
27
+ spec.add_development_dependency 'simplecov', '~> 0.8.2'
28
+ spec.add_development_dependency 'vcr', '~> 2.9.2'
29
+ end
@@ -0,0 +1,14 @@
1
+ # Main Amtrak module
2
+ module Amtrak
3
+ class Error < StandardError; end
4
+
5
+ def self.get(from, to, date: nil)
6
+ Amtrak::TrainFetcher.get(from, to, date: date).map do |html|
7
+ Amtrak::TrainParser.parse(html)
8
+ end.flatten
9
+ end
10
+ end
11
+
12
+ require 'amtrak/train_parser'
13
+ require 'amtrak/train_fetcher'
14
+ require 'amtrak/version'
@@ -0,0 +1,39 @@
1
+ module Amtrak
2
+ # Service for getting train time HTML page from the Amtrak website
3
+ class TrainFetcher
4
+ class Error < Amtrak::Error; end
5
+
6
+ def self.get(*args)
7
+ new(*args).get
8
+ end
9
+
10
+ attr_reader :from, :to, :date
11
+
12
+ def initialize(from, to, date: nil)
13
+ @from = from
14
+ @to = to
15
+ @date = date
16
+ end
17
+
18
+ def get
19
+ (1..total_pages).map do |page|
20
+ Amtrak::TrainFetcher::TrainPage.get(session_id, page)
21
+ end
22
+ end
23
+
24
+ def first_page
25
+ @first_page ||= Amtrak::TrainFetcher::MainPage.new(from, to, date: date)
26
+ end
27
+
28
+ def session_id
29
+ @session_id ||= first_page.session_id
30
+ end
31
+
32
+ def total_pages
33
+ @total_pages ||= first_page.total_pages
34
+ end
35
+ end
36
+ end
37
+
38
+ require 'amtrak/train_fetcher/main_page'
39
+ require 'amtrak/train_fetcher/train_page'
@@ -0,0 +1,70 @@
1
+ require 'excon'
2
+ require 'date'
3
+
4
+ module Amtrak
5
+ class TrainFetcher
6
+ # Service for getting train time results/cookies from the Amtrak website
7
+ class MainPage
8
+ attr_reader :from, :to
9
+
10
+ def initialize(from, to, date: nil)
11
+ @from = from
12
+ @to = to
13
+ @date = date
14
+ end
15
+
16
+ def page
17
+ @page ||= Excon.post(
18
+ 'http://tickets.amtrak.com/itd/amtrak',
19
+ headers: headers,
20
+ body: URI.encode_www_form(body),
21
+ expects: [200]
22
+ )
23
+ rescue Excon::Errors::ClientError, Excon::Errors::ServerError => ex
24
+ raise Amtrak::TrainFetcher::Error, "#{ex.class} #{ex.message}"
25
+ end
26
+
27
+ def headers
28
+ { 'Content-Type' => 'application/x-www-form-urlencoded' }
29
+ end
30
+
31
+ # rubocop:disable all
32
+ def body
33
+ {
34
+ "_handler=amtrak.presentation.handler.request.rail.AmtrakRailTrainStatusSearchRequestHandler/_xpath=/sessionWorkflow/productWorkflow[@product='Rail']" => '',
35
+ "/sessionWorkflow/productWorkflow[@product='Rail']/tripRequirements/journeyRequirements[1]/departDate.date" => departure_date,
36
+ 'requestor' => 'amtrak.presentation.handler.page.rail.AmtrakRailGetTrainStatusPageHandler',
37
+ 'xwdf_trainNumber' => "/sessionWorkflow/productWorkflow[@product='Rail']/tripRequirements/journeyRequirements[1]/segmentRequirements[1]/serviceCode",
38
+ 'wdf_trainNumber' => 'optional',
39
+ 'xwdf_SortBy' => "/sessionWorkflow/productWorkflow[@product='Rail']/tripRequirements/journeyRequirements[1]/departDate/@radioSelect",
40
+ 'wdf_SortBy' => 'arrivalTime',
41
+ 'xwdf_origin' => "/sessionWorkflow/productWorkflow[@product='Rail']/travelSelection/journeySelection[1]/departLocation/search",
42
+ 'wdf_origin' => from.to_s,
43
+ 'xwdf_destination' => "/sessionWorkflow/productWorkflow[@product='Rail']/travelSelection/journeySelection[1]/arriveLocation/search",
44
+ 'wdf_destination' => to.to_s,
45
+ }
46
+ end
47
+ # rubocop:enable all
48
+
49
+ def departure_date
50
+ date.strftime('%a, %b %-d, %Y')
51
+ end
52
+
53
+ def date
54
+ @date ||= Date.today
55
+ end
56
+
57
+ def session_id
58
+ page.headers['Set-Cookie'].match(/JSESSIONID=([^;]*)/)[1]
59
+ end
60
+
61
+ def total_pages
62
+ (Float(extract_listing_length) / 10).ceil
63
+ end
64
+
65
+ def extract_listing_length
66
+ page.body.match(/var availabilityLength = '(\d+)';/)[1]
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,41 @@
1
+ require 'excon'
2
+
3
+ module Amtrak
4
+ class TrainFetcher
5
+ # Service for getting per page train time HTML from the Amtrak website
6
+ class TrainPage
7
+ def self.get(*args)
8
+ new(*args).get
9
+ end
10
+
11
+ attr_reader :session_id, :page
12
+
13
+ def initialize(session_id, page)
14
+ @session_id = session_id
15
+ @page = page
16
+ end
17
+
18
+ def get
19
+ request.body
20
+ end
21
+
22
+ def request
23
+ @request ||= Excon.get(
24
+ 'https://tickets.amtrak.com/itd/amtrak/TrainStatusRequest',
25
+ headers: headers,
26
+ query: query
27
+ )
28
+ rescue Excon::Errors::ClientError, Excon::Errors::ServerError => ex
29
+ raise Amtrak::TrainFetcher::Error, "#{ex.class} #{ex.message}"
30
+ end
31
+
32
+ def headers
33
+ { 'Cookie' => "JSESSIONID=#{session_id}" }
34
+ end
35
+
36
+ def query
37
+ { '_trainstatuspage' => page }
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,90 @@
1
+ require 'nokogiri'
2
+
3
+ module Amtrak
4
+ # Will take in an HTML document as a string and parse out the train schedule
5
+ class TrainParser
6
+ # Error raised when the parser runs into an issue
7
+ class Error < Amtrak::Error; end
8
+
9
+ def self.parse(text)
10
+ new(text).parse
11
+ end
12
+
13
+ attr_reader :document
14
+
15
+ def initialize(document)
16
+ @document = Nokogiri::HTML.parse(document)
17
+ end
18
+
19
+ def parse
20
+ trains = []
21
+
22
+ until train_nodes.empty?
23
+ departure, arrival = train_nodes.shift(2)
24
+ trains << {
25
+ number: parse_train_number(departure),
26
+ departure: parse_train(departure),
27
+ arrival: parse_train(arrival)
28
+ }
29
+ end
30
+
31
+ trains
32
+ rescue Nokogiri::SyntaxError => ex
33
+ raise Amtrak::TrainParser::Error, "#{ex.class} #{ex.message}"
34
+ end
35
+
36
+ def train_nodes
37
+ @train_nodes ||= document.search(
38
+ "//tr[contains(@class, 'status_result')]"
39
+ ).tap { |results| fail 'No trains found' unless results.count > 0 }.to_a
40
+ end
41
+
42
+ def parse_train_number(node)
43
+ find!(
44
+ node, ".//th[@class='service']/div[@class='route_num']/text()"
45
+ ).to_s.to_i
46
+ end
47
+
48
+ def parse_train(node) # rubocop:disable Metrics/MethodLength
49
+ scheduled_date = find!(
50
+ node, ".//td[@class='scheduled']/div[@class='date']/text()"
51
+ ).to_s
52
+ estimated_date = find!(
53
+ node, ".//td[@class='act_est']/div[@class='date']/text()"
54
+ ).to_s
55
+ scheduled_time = remove_parentheses(
56
+ find!(node, ".//td[@class='scheduled']/div[@class='time']/text()").to_s
57
+ )
58
+ estimated_time = remove_parentheses(
59
+ find!(node, ".//td[@class='act_est']/div[@class='time']/text()").to_s
60
+ )
61
+
62
+ {
63
+ scheduled_date: scheduled_date,
64
+ estimated_date: estimated_date,
65
+ scheduled_time: scheduled_time,
66
+ estimated_time: estimated_time
67
+ }
68
+ end
69
+
70
+ def find!(node, xpath)
71
+ node.search(xpath).tap { |rs| fail "#{rs.count} results" if rs.count > 1 }
72
+ end
73
+
74
+ def make_datetime(date, time)
75
+ if date.nil? || time.nil?
76
+ return
77
+ else
78
+ DateTime.parse("#{date} at #{time}")
79
+ end
80
+ end
81
+
82
+ def remove_parentheses(time_string)
83
+ if matches = /\(([^)]+)\)/.match(time_string)
84
+ matches[1]
85
+ else
86
+ time_string
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,6 @@
1
+ module Amtrak
2
+ # This module holds the Amtrak version
3
+ module Version
4
+ VERSION = '0.0.1'
5
+ end
6
+ end
@@ -0,0 +1,75 @@
1
+ require 'spec_helper'
2
+
3
+ describe Amtrak::TrainFetcher::MainPage do
4
+ describe '#departure_date' do
5
+ subject { described_class.new('', '', date: date) }
6
+ let(:date) { Date.parse('2014-11-12') }
7
+
8
+ it 'prints out a formatted date' do
9
+ expect(subject.departure_date).to eq('Wed, Nov 12, 2014')
10
+ end
11
+ end
12
+
13
+ describe '#date' do
14
+ subject { described_class.new('', '', date: date) }
15
+
16
+ context 'on an instance with a date' do
17
+ let(:date) { Date.parse('2014-11-12') }
18
+
19
+ it 'returns the set date' do
20
+ expect(subject.date).to eq(date)
21
+ end
22
+ end
23
+
24
+ context 'on an instance without a date' do
25
+ let(:date) { nil }
26
+ let(:stubbed_date) { Date.parse('2014-11-12') }
27
+
28
+ it 'returns the current date' do
29
+ expect(Date).to receive(:today).and_return(stubbed_date)
30
+ expect(subject.date).to eq(stubbed_date)
31
+ end
32
+ end
33
+ end
34
+
35
+ describe '#total_pages' do
36
+ context 'when only one page exists on the website' do
37
+ subject { described_class.new('pvd', 'bby', date: Date.parse('2014-11-28')) }
38
+
39
+ it 'returns 1', :vcr do
40
+ expect(subject.total_pages).to eq(1)
41
+ end
42
+ end
43
+
44
+ context 'when more than one page exists on the website' do
45
+ subject { described_class.new('pvd', 'bby', date: Date.parse('2014-11-27')) }
46
+
47
+ it 'returns 2', :vcr do
48
+ expect(subject.total_pages).to eq(2)
49
+ end
50
+ end
51
+ end
52
+
53
+ describe '#session_id' do
54
+ subject { described_class.new('pvd', 'bby', date: Date.parse('2014-11-28')) }
55
+ it 'pulls the session id from the cookies', :vcr do
56
+ expect(subject.session_id).to eq('0000kOxPFtO4mDoSiIGk2yzvAz6:187j4dq9a')
57
+ end
58
+ end
59
+
60
+ describe '#page' do
61
+ context 'when Excon raises' do
62
+ subject { described_class.new('', '') }
63
+
64
+ it 'returns a TrainFetcher::Error' do
65
+ expect(Excon).to receive(:post) { fail Excon::Errors::ClientError, '' }
66
+ expect { subject.page }.to raise_error(Amtrak::TrainFetcher::Error)
67
+ end
68
+
69
+ it 'returns a TrainFetcher::Error' do
70
+ expect(Excon).to receive(:post) { fail Excon::Errors::ServerError, '' }
71
+ expect { subject.page }.to raise_error(Amtrak::TrainFetcher::Error)
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,37 @@
1
+ require 'spec_helper'
2
+
3
+ describe Amtrak::TrainFetcher::TrainPage do
4
+ subject { described_class.get(session_id, page) }
5
+
6
+ describe '#get', :vcr do
7
+ context 'when it works' do
8
+ let(:session_id) { '0000ITjAiGUWm-mBBpAyeGZ43Rv:187j4ddrq' }
9
+ let(:page) { '1' }
10
+
11
+ it 'includes various classes' do
12
+ expect(subject).to include('route_num')
13
+ expect(subject).to include('scheduled')
14
+ expect(subject).to include('act_est')
15
+ end
16
+ end
17
+
18
+ context 'when Excon raises an error' do
19
+ let(:session_id) { '0000ITjAiGUWm-mBBpAyeGZ43Rv:187j4ddrq' }
20
+ let(:page) { '1' }
21
+
22
+ it 'reraises as a TrainFetcher::Error' do
23
+ expect(Excon).to receive(:get) {
24
+ fail Excon::Errors::ClientError, ''
25
+ }
26
+ expect { subject }.to raise_error(Amtrak::TrainFetcher::Error)
27
+ end
28
+
29
+ it 'reraises as a TrainFetcher::Error' do
30
+ expect(Excon).to receive(:get) {
31
+ fail Excon::Errors::ServerError, ''
32
+ }
33
+ expect { subject }.to raise_error(Amtrak::TrainFetcher::Error)
34
+ end
35
+ end
36
+ end
37
+ end