RubyGems - amtrak - Versions diffs - 0.0.1 - Mend

amtrak 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

checksums.yaml +7 -0
data/.gitignore +2 -0
data/.rubocop.yml +6 -0
data/.travis.yml +5 -0
data/Gemfile +3 -0
data/README.markdown +3 -0
data/Rakefile +23 -0
data/amtrak.gemspec +29 -0
data/lib/amtrak.rb +14 -0
data/lib/amtrak/train_fetcher.rb +39 -0
data/lib/amtrak/train_fetcher/main_page.rb +70 -0
data/lib/amtrak/train_fetcher/train_page.rb +41 -0
data/lib/amtrak/train_parser.rb +90 -0
data/lib/amtrak/version.rb +6 -0
data/spec/amtrak/train_fetcher/main_page_spec.rb +75 -0
data/spec/amtrak/train_fetcher/train_page_spec.rb +37 -0
data/spec/amtrak/train_fetcher_spec.rb +59 -0
data/spec/amtrak/train_parser_spec.rb +230 -0
data/spec/amtrak_spec.rb +243 -0
data/spec/fixtures/html/pvd_to_bby.html +2401 -0
data/spec/fixtures/vcr/Amtrak/_get/returns_a_list_of_train_times.yml +1469 -0
data/spec/fixtures/vcr/Amtrak_TrainFetcher/_get/does_the_same_as_get.yml +1467 -0
data/spec/fixtures/vcr/Amtrak_TrainFetcher/_get/given_a_valid_date_and_invalid_train_stations/does_not_include_various_classes_and_includes_an_error.yml +843 -0
data/spec/fixtures/vcr/Amtrak_TrainFetcher/_get/given_a_valid_date_and_train_stations/includes_various_classes.yml +1467 -0
data/spec/fixtures/vcr/Amtrak_TrainFetcher/_get/given_an_invalid_date_and_valid_train_stations/does_not_include_various_classes_and_includes_an_error.yml +838 -0
data/spec/fixtures/vcr/Amtrak_TrainFetcher_MainPage/_session_id/pulls_the_session_id_from_the_cookies.yml +1082 -0
data/spec/fixtures/vcr/Amtrak_TrainFetcher_MainPage/_total_pages/when_more_than_one_page_exists_on_the_website/returns_1.yml +1076 -0
data/spec/fixtures/vcr/Amtrak_TrainFetcher_MainPage/_total_pages/when_more_than_one_page_exists_on_the_website/returns_2.yml +1074 -0
data/spec/fixtures/vcr/Amtrak_TrainFetcher_MainPage/_total_pages/when_only_one_page_exists_on_the_website/returns_1.yml +1082 -0
data/spec/fixtures/vcr/Amtrak_TrainFetcher_TrainPage/_get/when_it_works/includes_various_classes.yml +247 -0
data/spec/spec_helper.rb +18 -0
data/spec/support/vcr.rb +10 -0
metadata +206 -0

checksums.yaml ADDED

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 893ffef1c08c1763b3a34c0f70d9dd303f89385f
+  data.tar.gz: c2e940a65b8bf344359b2ca25a15b0c853776c3a
+SHA512:
+  metadata.gz: f98144098bbe5944ababe3817acc7713a838279dc63b1a9cacf5af17a34584faaf879c8ebce826a462867ddab44bce7fb5db5e1fca9ad4acce5ad05a60edf442
+  data.tar.gz: 42ce40b14aeb3341e0d46e20512b2275fb80e02d51cdf03ff475314974084db1190244c340f79cf6c175add675cc2d261e0d3a28bf1cd6249008c109cc10dd84

data/.gitignore ADDED

	@@ -0,0 +1,2 @@
1	+ Gemfile.lock
2	+ coverage

data/.rubocop.yml ADDED

@@ -0,0 +1,6 @@
+Lint/AssignmentInCondition:
+  Enabled: false
+Metrics/MethodLength:
+  Max: 12
+Metrics/LineLength:
+  Max: 100

data/.travis.yml ADDED

@@ -0,0 +1,5 @@
+language: ruby
+rvm:
+    - 2.0.0
+    - 2.1.1
+script: bundle exec rake

data/Gemfile ADDED

@@ -0,0 +1,3 @@
+source 'https://rubygems.org'
+gemspec

data/README.markdown ADDED

@@ -0,0 +1,3 @@
+# Amtrak
+Pulls down the train times from the Amtrak website

data/Rakefile ADDED

@@ -0,0 +1,23 @@
+require 'rake'
+default_tasks = []
+begin
+  require 'rubocop/rake_task'
+  RuboCop::RakeTask.new(:rubocop) do |task|
+    task.patterns = ['lib/**/*.rb','spec/**/*.rb']
+  end
+  default_tasks << :rubocop
+rescue LoadError
+end
+begin
+  require 'rspec/core/rake_task'
+  RSpec::Core::RakeTask.new(:spec)
+  default_tasks << :spec
+rescue LoadError
+end
+task default: default_tasks

data/amtrak.gemspec ADDED

@@ -0,0 +1,29 @@
+# coding: utf-8
+lib = File.expand_path('../lib', __FILE__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require 'amtrak/version'
+Gem::Specification.new do |spec|
+  spec.name          = 'amtrak'
+  spec.version       = Amtrak::Version::VERSION
+  spec.authors       = ['Todd Lunter']
+  spec.email         = ['tlunter@gmail.com']
+  spec.description   = %q{Scrape train times from Amtrak.com}
+  spec.summary       = %q{Scrape train times from Amtrak.com}
+  spec.homepage      = 'https://github.com/tlunter/amtrak_gem'
+  spec.license       = 'MIT'
+  spec.files         = `git ls-files`.split($/)
+  spec.executables   = spec.files.grep(%{^bin/_}) { File.basename(f) }
+  spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
+  spec.require_paths = ['lib']
+  spec.add_dependency 'excon', '~> 0.41.0'
+  spec.add_dependency 'nokogiri', '~> 1.6.4'
+  spec.add_development_dependency 'rake'
+  spec.add_development_dependency 'pry'
+  spec.add_development_dependency 'rspec', '~> 3.0.0'
+  spec.add_development_dependency 'rubocop', '~> 0.25.0'
+  spec.add_development_dependency 'simplecov', '~> 0.8.2'
+  spec.add_development_dependency 'vcr', '~> 2.9.2'
+end

data/lib/amtrak.rb ADDED

@@ -0,0 +1,14 @@
+# Main Amtrak module
+module Amtrak
+  class Error < StandardError; end
+  def self.get(from, to, date: nil)
+    Amtrak::TrainFetcher.get(from, to, date: date).map do |html|
+      Amtrak::TrainParser.parse(html)
+    end.flatten
+  end
+end
+require 'amtrak/train_parser'
+require 'amtrak/train_fetcher'
+require 'amtrak/version'

data/lib/amtrak/train_fetcher.rb ADDED

@@ -0,0 +1,39 @@
+module Amtrak
+  # Service for getting train time HTML page from the Amtrak website
+  class TrainFetcher
+    class Error < Amtrak::Error; end
+    def self.get(*args)
+      new(*args).get
+    end
+    attr_reader :from, :to, :date
+    def initialize(from, to, date: nil)
+      @from = from
+      @to = to
+      @date = date
+    end
+    def get
+      (1..total_pages).map do |page|
+        Amtrak::TrainFetcher::TrainPage.get(session_id, page)
+      end
+    end
+    def first_page
+      @first_page ||= Amtrak::TrainFetcher::MainPage.new(from, to, date: date)
+    end
+    def session_id
+      @session_id ||= first_page.session_id
+    end
+    def total_pages
+      @total_pages ||= first_page.total_pages
+    end
+  end
+end
+require 'amtrak/train_fetcher/main_page'
+require 'amtrak/train_fetcher/train_page'

data/lib/amtrak/train_fetcher/main_page.rb ADDED

@@ -0,0 +1,70 @@
+require 'excon'
+require 'date'
+module Amtrak
+  class TrainFetcher
+    # Service for getting train time results/cookies from the Amtrak website
+    class MainPage
+      attr_reader :from, :to
+      def initialize(from, to, date: nil)
+        @from = from
+        @to = to
+        @date = date
+      end
+      def page
+        @page ||= Excon.post(
+          'http://tickets.amtrak.com/itd/amtrak',
+          headers: headers,
+          body: URI.encode_www_form(body),
+          expects: [200]
+        )
+      rescue Excon::Errors::ClientError, Excon::Errors::ServerError => ex
+        raise Amtrak::TrainFetcher::Error, "#{ex.class} #{ex.message}"
+      end
+      def headers
+        { 'Content-Type' => 'application/x-www-form-urlencoded' }
+      end
+      # rubocop:disable all
+      def body
+        {
+          "_handler=amtrak.presentation.handler.request.rail.AmtrakRailTrainStatusSearchRequestHandler/_xpath=/sessionWorkflow/productWorkflow[@product='Rail']" => '',
+          "/sessionWorkflow/productWorkflow[@product='Rail']/tripRequirements/journeyRequirements[1]/departDate.date" => departure_date,
+          'requestor'        => 'amtrak.presentation.handler.page.rail.AmtrakRailGetTrainStatusPageHandler',
+          'xwdf_trainNumber' => "/sessionWorkflow/productWorkflow[@product='Rail']/tripRequirements/journeyRequirements[1]/segmentRequirements[1]/serviceCode",
+          'wdf_trainNumber'  => 'optional',
+          'xwdf_SortBy'      => "/sessionWorkflow/productWorkflow[@product='Rail']/tripRequirements/journeyRequirements[1]/departDate/@radioSelect",
+          'wdf_SortBy'       => 'arrivalTime',
+          'xwdf_origin'      => "/sessionWorkflow/productWorkflow[@product='Rail']/travelSelection/journeySelection[1]/departLocation/search",
+          'wdf_origin'       => from.to_s,
+          'xwdf_destination' => "/sessionWorkflow/productWorkflow[@product='Rail']/travelSelection/journeySelection[1]/arriveLocation/search",
+          'wdf_destination'  => to.to_s,
+        }
+      end
+      # rubocop:enable all
+      def departure_date
+        date.strftime('%a, %b %-d, %Y')
+      end
+      def date
+        @date ||= Date.today
+      end
+      def session_id
+        page.headers['Set-Cookie'].match(/JSESSIONID=([^;]*)/)[1]
+      end
+      def total_pages
+        (Float(extract_listing_length) / 10).ceil
+      end
+      def extract_listing_length
+        page.body.match(/var availabilityLength = '(\d+)';/)[1]
+      end
+    end
+  end
+end

data/lib/amtrak/train_fetcher/train_page.rb ADDED

@@ -0,0 +1,41 @@
+require 'excon'
+module Amtrak
+  class TrainFetcher
+    # Service for getting per page train time HTML from the Amtrak website
+    class TrainPage
+      def self.get(*args)
+        new(*args).get
+      end
+      attr_reader :session_id, :page
+      def initialize(session_id, page)
+        @session_id = session_id
+        @page = page
+      end
+      def get
+        request.body
+      end
+      def request
+        @request ||= Excon.get(
+          'https://tickets.amtrak.com/itd/amtrak/TrainStatusRequest',
+          headers: headers,
+          query: query
+        )
+      rescue Excon::Errors::ClientError, Excon::Errors::ServerError => ex
+        raise Amtrak::TrainFetcher::Error, "#{ex.class} #{ex.message}"
+      end
+      def headers
+        { 'Cookie' => "JSESSIONID=#{session_id}" }
+      end
+      def query
+        { '_trainstatuspage' => page }
+      end
+    end
+  end
+end

data/lib/amtrak/train_parser.rb ADDED

@@ -0,0 +1,90 @@
+require 'nokogiri'
+module Amtrak
+  # Will take in an HTML document as a string and parse out the train schedule
+  class TrainParser
+    # Error raised when the parser runs into an issue
+    class Error < Amtrak::Error; end
+    def self.parse(text)
+      new(text).parse
+    end
+    attr_reader :document
+    def initialize(document)
+      @document = Nokogiri::HTML.parse(document)
+    end
+    def parse
+      trains = []
+      until train_nodes.empty?
+        departure, arrival = train_nodes.shift(2)
+        trains << {
+          number: parse_train_number(departure),
+          departure: parse_train(departure),
+          arrival: parse_train(arrival)
+        }
+      end
+      trains
+    rescue Nokogiri::SyntaxError => ex
+      raise Amtrak::TrainParser::Error, "#{ex.class} #{ex.message}"
+    end
+    def train_nodes
+      @train_nodes ||= document.search(
+        "//tr[contains(@class, 'status_result')]"
+      ).tap { |results| fail 'No trains found' unless results.count > 0 }.to_a
+    end
+    def parse_train_number(node)
+      find!(
+        node, ".//th[@class='service']/div[@class='route_num']/text()"
+      ).to_s.to_i
+    end
+    def parse_train(node) # rubocop:disable Metrics/MethodLength
+      scheduled_date = find!(
+        node, ".//td[@class='scheduled']/div[@class='date']/text()"
+      ).to_s
+      estimated_date = find!(
+        node, ".//td[@class='act_est']/div[@class='date']/text()"
+      ).to_s
+      scheduled_time = remove_parentheses(
+        find!(node, ".//td[@class='scheduled']/div[@class='time']/text()").to_s
+      )
+      estimated_time = remove_parentheses(
+        find!(node, ".//td[@class='act_est']/div[@class='time']/text()").to_s
+      )
+      {
+        scheduled_date: scheduled_date,
+        estimated_date: estimated_date,
+        scheduled_time: scheduled_time,
+        estimated_time: estimated_time
+      }
+    end
+    def find!(node, xpath)
+      node.search(xpath).tap { |rs| fail "#{rs.count} results" if rs.count > 1 }
+    end
+    def make_datetime(date, time)
+      if date.nil? || time.nil?
+        return
+      else
+        DateTime.parse("#{date} at #{time}")
+      end
+    end
+    def remove_parentheses(time_string)
+      if matches = /\(([^)]+)\)/.match(time_string)
+        matches[1]
+      else
+        time_string
+      end
+    end
+  end
+end

data/lib/amtrak/version.rb ADDED

@@ -0,0 +1,6 @@
+module Amtrak
+  # This module holds the Amtrak version
+  module Version
+    VERSION = '0.0.1'
+  end
+end

data/spec/amtrak/train_fetcher/main_page_spec.rb ADDED

@@ -0,0 +1,75 @@
+require 'spec_helper'
+describe Amtrak::TrainFetcher::MainPage do
+  describe '#departure_date' do
+    subject { described_class.new('', '', date: date) }
+    let(:date) { Date.parse('2014-11-12') }
+    it 'prints out a formatted date' do
+      expect(subject.departure_date).to eq('Wed, Nov 12, 2014')
+    end
+  end
+  describe '#date' do
+    subject { described_class.new('', '', date: date) }
+    context 'on an instance with a date' do
+      let(:date) { Date.parse('2014-11-12') }
+      it 'returns the set date' do
+        expect(subject.date).to eq(date)
+      end
+    end
+    context 'on an instance without a date' do
+      let(:date) { nil }
+      let(:stubbed_date) { Date.parse('2014-11-12') }
+      it 'returns the current date' do
+        expect(Date).to receive(:today).and_return(stubbed_date)
+        expect(subject.date).to eq(stubbed_date)
+      end
+    end
+  end
+  describe '#total_pages' do
+    context 'when only one page exists on the website' do
+      subject { described_class.new('pvd', 'bby', date: Date.parse('2014-11-28')) }
+      it 'returns 1', :vcr do
+        expect(subject.total_pages).to eq(1)
+      end
+    end
+    context 'when more than one page exists on the website' do
+      subject { described_class.new('pvd', 'bby', date: Date.parse('2014-11-27')) }
+      it 'returns 2', :vcr do
+        expect(subject.total_pages).to eq(2)
+      end
+    end
+  end
+  describe '#session_id' do
+    subject { described_class.new('pvd', 'bby', date: Date.parse('2014-11-28')) }
+    it 'pulls the session id from the cookies', :vcr do
+      expect(subject.session_id).to eq('0000kOxPFtO4mDoSiIGk2yzvAz6:187j4dq9a')
+    end
+  end
+  describe '#page' do
+    context 'when Excon raises' do
+      subject { described_class.new('', '') }
+      it 'returns a TrainFetcher::Error' do
+        expect(Excon).to receive(:post) { fail Excon::Errors::ClientError, '' }
+        expect { subject.page }.to raise_error(Amtrak::TrainFetcher::Error)
+      end
+      it 'returns a TrainFetcher::Error' do
+        expect(Excon).to receive(:post) { fail Excon::Errors::ServerError, '' }
+        expect { subject.page }.to raise_error(Amtrak::TrainFetcher::Error)
+      end
+    end
+  end
+end

data/spec/amtrak/train_fetcher/train_page_spec.rb ADDED

@@ -0,0 +1,37 @@
+require 'spec_helper'
+describe Amtrak::TrainFetcher::TrainPage do
+  subject { described_class.get(session_id, page) }
+  describe '#get', :vcr do
+    context 'when it works' do
+      let(:session_id) { '0000ITjAiGUWm-mBBpAyeGZ43Rv:187j4ddrq' }
+      let(:page) { '1' }
+      it 'includes various classes' do
+        expect(subject).to include('route_num')
+        expect(subject).to include('scheduled')
+        expect(subject).to include('act_est')
+      end
+    end
+    context 'when Excon raises an error' do
+      let(:session_id) { '0000ITjAiGUWm-mBBpAyeGZ43Rv:187j4ddrq' }
+      let(:page) { '1' }
+      it 'reraises as a TrainFetcher::Error' do
+        expect(Excon).to receive(:get) {
+          fail Excon::Errors::ClientError, ''
+        }
+        expect { subject }.to raise_error(Amtrak::TrainFetcher::Error)
+      end
+      it 'reraises as a TrainFetcher::Error' do
+        expect(Excon).to receive(:get) {
+          fail Excon::Errors::ServerError, ''
+        }
+        expect { subject }.to raise_error(Amtrak::TrainFetcher::Error)
+      end
+    end
+  end
+end