viaggiatreno 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in viaggiatreno.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Michele Bologna
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,29 @@
1
+ # Viaggiatreno
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'viaggiatreno'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install viaggiatreno
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,6 @@
1
+ require "viaggiatreno/version"
2
+ require "viaggiatreno/Train"
3
+
4
+
5
+ module Viaggiatreno
6
+ end
@@ -0,0 +1,23 @@
1
+ class RegExpMatchInfo
2
+
3
+ # regex to match train status (string)
4
+ @@REGEXP_STATE_RUNNING = /(Il treno viaggia.*)(Ultimo rilevamento a)(.*)/
5
+ @@REGEXP_STATE_NOT_STARTED = /Il treno non e' ancora partito/
6
+ @@REGEXP_STATE_FINISHED = /Il treno e' arrivato.*/
7
+ @@REGEXP_DELAY_STR = /con (\d+) minuti di ([anticipo|ritardo]+)/
8
+ @@REGEXP_NODELAY_STR = /Il treno .* in orario.*/
9
+ @@REGEXP_STOP_ALREADY_DONE = /giaeffettuate/
10
+ @@STR_DELAY_STR = "ritardo"
11
+ @@STR_TRAIN_NUMBER_URL_REPLACE = "TRAINNUMBER"
12
+
13
+ # attr_reader for class variables
14
+ def self.REGEXP_STATE_FINISHED() @@REGEXPSTATE_FINISHED end
15
+ def self.REGEXP_STATE_RUNNING() @@REGEXP_STATE_RUNNING end
16
+ def self.REGEXP_STATE_NOT_STARTED() @@REGEXP_STATE_NOT_STARTED end
17
+ def self.REGEXP_STATE_FINISHED() @@REGEXP_STATE_FINISHED end
18
+ def self.STR_DELAY_STR() @@STR_DELAY_STR end
19
+ def self.REGEXP_DELAY_STR() @@REGEXP_DELAY_STR end
20
+ def self.REGEXP_NODELAY_STR() @@REGEXP_NODELAY_STR end
21
+ def self.REGEXP_STOP_ALREADY_DONE() @@REGEXP_STOP_ALREADY_DONE end
22
+ def self.STR_TRAIN_NUMBER_URL_REPLACE() @@STR_TRAIN_NUMBER_URL_REPLACE end
23
+ end
@@ -0,0 +1,87 @@
1
+ require 'open-uri'
2
+ require 'nokogiri'
3
+ require_relative 'TrainStop'
4
+ require_relative 'TrainState'
5
+ require_relative 'StringUtils'
6
+ require_relative 'RegExpMatchInfo'
7
+ require_relative 'StopState'
8
+ require_relative 'XPathMatchInfo'
9
+ require_relative 'ViaggiatrenoURLs'
10
+
11
+ class Scraper
12
+
13
+ def initialize(trainNumber, train)
14
+ @site_info_main = ViaggiatrenoURLs.SITE_INFO_MAIN.gsub(
15
+ RegExpMatchInfo.STR_TRAIN_NUMBER_URL_REPLACE, trainNumber)
16
+ @site_info_details = ViaggiatrenoURLs.SITE_INFO_DETAILS.gsub(
17
+ RegExpMatchInfo.STR_TRAIN_NUMBER_URL_REPLACE, trainNumber)
18
+ @train = train
19
+ end
20
+
21
+ # fetch and parse basic train information (status, trainName, details)
22
+ def updateTrain()
23
+ doc = Nokogiri::HTML(open(@site_info_main))
24
+ doc.xpath(XPathMatchInfo.XPATH_STATUS).each do |x|
25
+ @status = StringUtils.remove_newlines_tabs_and_spaces(x)
26
+ end
27
+ doc.xpath(XPathMatchInfo.XPATH_TRAIN_NAME).each do |x|
28
+ @trainName = x.content
29
+ end
30
+ if @status =~ RegExpMatchInfo.REGEXP_STATE_NOT_STARTED
31
+ @train.state = TrainState.NOT_STARTED
32
+ elsif @status =~ RegExpMatchInfo.REGEXP_STATE_RUNNING or \
33
+ RegExpMatchInfo.REGEXP_STATE_FINISHED
34
+ if @status =~ RegExpMatchInfo.REGEXP_NODELAY_STR
35
+ @train.delay = 0
36
+ else
37
+ @train.delay = @status.match(RegExpMatchInfo.REGEXP_DELAY_STR)[1].to_i
38
+ if @status.match(RegExpMatchInfo.REGEXP_DELAY_STR)[2] \
39
+ != RegExpMatchInfo.STR_DELAY_STR
40
+ # train is ahead of time, delay is negative
41
+ @train.delay *= -1
42
+ end
43
+ end
44
+ if @status =~ RegExpMatchInfo.REGEXP_STATE_RUNNING
45
+ @train.state = TrainState.RUNNING
46
+ @train.lastUpdate = @status.match(
47
+ RegExpMatchInfo.REGEXP_STATE_RUNNING)[3].strip
48
+ @status = @status.match(RegExpMatchInfo.REGEXP_STATE_RUNNING)[1].rstrip
49
+ else
50
+ @train.state = TrainState.FINISHED
51
+ end
52
+ end
53
+
54
+ @train.status = @status
55
+ @train.trainName = @trainName
56
+ end
57
+
58
+ # fetch and parse train details (departing and arriving station,
59
+ # intermediate stops)
60
+ def updateTrainDetails()
61
+ doc = Nokogiri::HTML(open(@site_info_details))
62
+ doc.xpath(XPathMatchInfo.XPATH_DETAILS_GENERIC).each do |x|
63
+ x.xpath(XPathMatchInfo.XPATH_DETAILS_STATION_NAME).each do |stationName|
64
+ @stationName = stationName.to_s
65
+ end
66
+ x.xpath(XPathMatchInfo.XPATH_DETAILS_SCHEDULED_STOP_TIME).each do \
67
+ |scheduledArrivalTime|
68
+ @scheduledArrivalTime = StringUtils.remove_newlines_tabs_and_spaces(
69
+ scheduledArrivalTime).to_s
70
+ end
71
+ x.xpath(XPathMatchInfo.XPATH_DETAILS_ACTUAL_STOP_TIME).each do \
72
+ |actualArrivalTime|
73
+ @actualArrivalTime = StringUtils.remove_newlines_tabs_and_spaces(
74
+ actualArrivalTime).to_s
75
+ end
76
+ if x.attributes()['class'].to_s =~ RegExpMatchInfo.REGEXP_STOP_ALREADY_DONE
77
+ t = TrainStop.new(@stationName, @scheduledArrivalTime,
78
+ @actualArrivalTime, StopState.DONE)
79
+ else
80
+ t = TrainStop.new(@stationName, @scheduledArrivalTime,
81
+ @actualArrivalTime, StopState.TODO)
82
+ end
83
+ @train.addStop(t)
84
+ end
85
+ end
86
+ end
87
+
@@ -0,0 +1,8 @@
1
+ class StopState
2
+ # train station status: already done ("DONE") or to be done ("TODO")
3
+ @@DONE = "DONE"
4
+ @@TODO = "TODO"
5
+
6
+ def self.DONE() @@DONE end
7
+ def self.TODO() @@TODO end
8
+ end
@@ -0,0 +1,7 @@
1
+ class StringUtils
2
+ # utility method
3
+ def self.remove_newlines_tabs_and_spaces(str)
4
+ return str.content.gsub(/\r/, "").gsub(/\n/, "")\
5
+ .gsub(/\t/, " ").gsub(/ +/, " ").strip
6
+ end
7
+ end
@@ -0,0 +1,92 @@
1
+ require_relative 'Scraper.rb'
2
+ require_relative 'TrainState.rb'
3
+ require_relative 'TrainStop.rb'
4
+
5
+ class Train
6
+ attr_accessor :trainNumber, :trainName, :delay, :status, :lastUpdate, :state
7
+
8
+ def initialize(trainNumber)
9
+ @trainNumber = trainNumber
10
+ @scraper = Scraper.new(trainNumber.to_s, self)
11
+ self.update()
12
+ end
13
+
14
+ def update()
15
+ @scraper.updateTrain()
16
+ end
17
+
18
+ def updateDetails()
19
+ @trainStops = Array.new
20
+ @scraper.updateTrainDetails()
21
+ end
22
+
23
+ def to_s
24
+ return "#{@trainNumber} #{@trainName}: #{@status} state: #{@state}, \
25
+ delay: #{@delay}, lastUpdate: #{@lastUpdate} #{@trainStops}"
26
+ end
27
+
28
+ def addStop(trainStop)
29
+ @trainStops << trainStop
30
+ end
31
+
32
+ def trainStops()
33
+ if @trainStops == nil
34
+ self.updateDetails()
35
+ end
36
+ @trainStops
37
+ end
38
+
39
+ def departingStation()
40
+ self.trainStops.first.trainStation.to_s
41
+ end
42
+
43
+ def arrivingStation()
44
+ self.trainStops.last.trainStation.to_s
45
+ end
46
+
47
+ def scheduledDepartingTime()
48
+ self.trainStops.first.scheduledStopTime.to_s
49
+ end
50
+
51
+ def scheduledArrivingTime()
52
+ self.trainStops.last.scheduledStopTime.to_s
53
+ end
54
+
55
+ def actualDepartingTime()
56
+ self.trainStops.first.actualStopTime.to_s
57
+ end
58
+
59
+ def actualArrivingTime()
60
+ self.trainStops.last.actualStopTime.to_s
61
+ end
62
+
63
+ def scheduledStopTime(stationName)
64
+ self.trainStops.each do |trainStop|
65
+ if trainStop.trainStation.to_s == stationName
66
+ return trainStop.scheduledStopTime.to_s
67
+ end
68
+ end
69
+ end
70
+
71
+ def actualStopTime(stationName)
72
+ self.trainStops.each do |trainStop|
73
+ if trainStop.trainStation.to_s == stationName
74
+ return trainStop.actualStopTime.to_s
75
+ end
76
+ end
77
+ end
78
+
79
+ def lastStop()
80
+ if self.trainStops[0].status.to_s != StopState.DONE
81
+ return @trainStops[0].to_s
82
+ end
83
+ (self.trainStops.length-1).times do |i|
84
+ if self.trainStops[i].status.to_s == StopState.DONE && \
85
+ self.trainStops[i+1].status.to_s != StopState.DONE
86
+ return self.trainStops[i].to_s
87
+ end
88
+ end
89
+ return self.trainStops[self.trainStops.length-1].to_s
90
+ end
91
+ end
92
+
@@ -0,0 +1,11 @@
1
+ class TrainState
2
+ # Train status
3
+ @@RUNNING = "TRAVELING"
4
+ @@FINISHED = "ARRIVED"
5
+ @@NOT_STARTED = "NOT DEPARTED"
6
+
7
+ def self.RUNNING() @@RUNNING end
8
+ def self.FINISHED() @@FINISHED end
9
+ def self.NOT_STARTED() @@NOT_STARTED end
10
+ end
11
+
@@ -0,0 +1,24 @@
1
+ require_relative 'StopState'
2
+
3
+ class TrainStop
4
+ attr_accessor :trainStation, :scheduledStopTime, :actualStopTime, :status
5
+
6
+ def initialize(trainStation, scheduledStopTime, actualStopTime, status)
7
+ @trainStation = trainStation
8
+ @scheduledStopTime = scheduledStopTime
9
+ @actualStopTime = actualStopTime
10
+ @status = status
11
+ end
12
+
13
+ def to_s
14
+ retstr = ""
15
+ if @status == StopState.DONE
16
+ retstr += "[X] "
17
+ elsif @status == StopState.TODO
18
+ retstr += "[ ] "
19
+ end
20
+ retstr += "#{trainStation} = SCHEDULED: #{scheduledStopTime} EXPECTED: #{actualStopTime}"
21
+ return retstr
22
+ end
23
+ end
24
+
@@ -0,0 +1,17 @@
1
+ require_relative 'RegExpMatchInfo'
2
+
3
+ class ViaggiatrenoURLs
4
+ @@SITE_INFO_MAIN =
5
+ "http://mobile.viaggiatreno.it/viaggiatreno/mobile/numero?numeroTreno=" + \
6
+ RegExpMatchInfo.STR_TRAIN_NUMBER_URL_REPLACE + \
7
+ "&tipoRicerca=numero&lang=IT"
8
+ @@SITE_INFO_DETAILS =
9
+ "http://mobile.viaggiatreno.it/viaggiatreno/mobile/scheda?dettaglio=visualizza&numeroTreno=" + \
10
+ RegExpMatchInfo.STR_TRAIN_NUMBER_URL_REPLACE + \
11
+ "&tipoRicerca=numero&lang=IT"
12
+
13
+ def self.SITE_INFO_MAIN() @@SITE_INFO_MAIN end
14
+ def self.SITE_INFO_DETAILS() @@SITE_INFO_DETAILS end
15
+
16
+ end
17
+
@@ -0,0 +1,19 @@
1
+ class XPathMatchInfo
2
+ # xpath expression to retrieve train info
3
+ @@XPATH_STATUS = '//div[@class="evidenziato"]/strong'
4
+ @@XPATH_TRAIN_NAME = '//h1/text()'
5
+ @@XPATH_DETAILS_GENERIC =
6
+ '//div[@class="giaeffettuate"] | //div[@class="corpocentrale"]'
7
+ @@XPATH_DETAILS_STATION_NAME = 'h2/text()'
8
+ @@XPATH_DETAILS_SCHEDULED_STOP_TIME = 'p[1]/strong[1]/text()'
9
+ @@XPATH_DETAILS_ACTUAL_STOP_TIME = 'p[2]/strong[1]/text()'
10
+
11
+ def self.XPATH_STATUS() @@XPATH_STATUS end
12
+ def self.XPATH_TRAIN_NAME() @@XPATH_TRAIN_NAME end
13
+ def self.XPATH_DETAILS_GENERIC() @@XPATH_DETAILS_GENERIC end
14
+ def self.XPATH_DETAILS_STATION_NAME() @@XPATH_DETAILS_STATION_NAME end
15
+ def self.XPATH_DETAILS_SCHEDULED_STOP_TIME()
16
+ @@XPATH_DETAILS_SCHEDULED_STOP_TIME end
17
+ def self.XPATH_DETAILS_ACTUAL_STOP_TIME() @@XPATH_DETAILS_ACTUAL_STOP_TIME end
18
+ end
19
+
@@ -0,0 +1,4 @@
1
+ module Viaggiatreno
2
+ VERSION = "1.0.1"
3
+ end
4
+
@@ -0,0 +1,20 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'viaggiatreno/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "viaggiatreno"
8
+ gem.version = Viaggiatreno::VERSION
9
+ gem.authors = ["Michele Bologna"]
10
+ gem.email = ["michele.bologna@gmail.com"]
11
+ gem.description = %q{A web scraper to fetch real time information on train riding the Italian railway system (viaggiatreno/trenitalia)}
12
+ gem.summary = %q{A scraper for real time information on Italian railway system (viaggiatreno)}
13
+ gem.homepage = ""
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+ gem.add_dependency "nokogiri"
20
+ end
metadata ADDED
@@ -0,0 +1,79 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: viaggiatreno
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Michele Bologna
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-05-24 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description: A web scraper to fetch real time information on train riding the Italian
31
+ railway system (viaggiatreno/trenitalia)
32
+ email:
33
+ - michele.bologna@gmail.com
34
+ executables: []
35
+ extensions: []
36
+ extra_rdoc_files: []
37
+ files:
38
+ - .gitignore
39
+ - Gemfile
40
+ - LICENSE.txt
41
+ - README.md
42
+ - Rakefile
43
+ - lib/viaggiatreno.rb
44
+ - lib/viaggiatreno/RegExpMatchInfo.rb
45
+ - lib/viaggiatreno/Scraper.rb
46
+ - lib/viaggiatreno/StopState.rb
47
+ - lib/viaggiatreno/StringUtils.rb
48
+ - lib/viaggiatreno/Train.rb
49
+ - lib/viaggiatreno/TrainState.rb
50
+ - lib/viaggiatreno/TrainStop.rb
51
+ - lib/viaggiatreno/ViaggiatrenoURLs.rb
52
+ - lib/viaggiatreno/XPathMatchInfo.rb
53
+ - lib/viaggiatreno/version.rb
54
+ - viaggiatreno.gemspec
55
+ homepage: ''
56
+ licenses: []
57
+ post_install_message:
58
+ rdoc_options: []
59
+ require_paths:
60
+ - lib
61
+ required_ruby_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ! '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ none: false
69
+ requirements:
70
+ - - ! '>='
71
+ - !ruby/object:Gem::Version
72
+ version: '0'
73
+ requirements: []
74
+ rubyforge_project:
75
+ rubygems_version: 1.8.23
76
+ signing_key:
77
+ specification_version: 3
78
+ summary: A scraper for real time information on Italian railway system (viaggiatreno)
79
+ test_files: []