southeastern-daily-performance 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README +25 -0
- data/Rakefile +128 -0
- data/bin/sedpr-to-csv +12 -0
- data/combined.csv +1210 -0
- data/lib/sedpr.rb +3 -0
- data/lib/sedpr/affected_service.rb +41 -0
- data/lib/sedpr/affected_services_report.rb +28 -0
- data/lib/sedpr/daily_performance_report.rb +55 -0
- metadata +62 -0
data/lib/sedpr.rb
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
class AffectedService
|
2
|
+
|
3
|
+
attr_reader :reason_for_disruption
|
4
|
+
attr_reader :scheduled_start_time, :scheduled_start_station, :scheduled_destination_station
|
5
|
+
attr_reader :effect_on_service
|
6
|
+
|
7
|
+
def initialize(reason_for_disruption, incident_text)
|
8
|
+
@reason_for_disruption = reason_for_disruption
|
9
|
+
if incident_text =~ /(\d\d:\d\d) (.*?) ?(?:-|to|\?) (.*)/
|
10
|
+
@scheduled_start_time, @scheduled_start_station = $1, $2
|
11
|
+
destination_and_effect_on_service = $3
|
12
|
+
@scheduled_start_station.gsub!(/[^a-zA-Z ]/, '')
|
13
|
+
reasons = [
|
14
|
+
'cancelled', 'started', 'delayed by', 'did not call', 'terminated at', 'diverted'
|
15
|
+
]
|
16
|
+
matches = reasons.collect do |reason|
|
17
|
+
destination_and_effect_on_service =~ /#{reason}/
|
18
|
+
end
|
19
|
+
if matches.compact.min && reason = reasons[matches.index(matches.compact.min)]
|
20
|
+
destination_and_effect_on_service =~ /(.*) (#{reason}.*)/
|
21
|
+
else
|
22
|
+
unless destination_and_effect_on_service.split(' ').length == 1
|
23
|
+
warn "Warning. Unknown, or missing, affect on service: '#{incident_text}'"
|
24
|
+
end
|
25
|
+
destination_and_effect_on_service =~ /(.*)/
|
26
|
+
end
|
27
|
+
@scheduled_destination_station, @effect_on_service = $1, ($2||'')
|
28
|
+
else
|
29
|
+
warn "Warning. Cannot parse service details: '#{incident_text}'"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def ==(incident)
|
34
|
+
self.reason_for_disruption == incident.reason_for_disruption and
|
35
|
+
self.scheduled_start_time == incident.scheduled_start_time and
|
36
|
+
self.scheduled_start_station == incident.scheduled_start_station and
|
37
|
+
self.scheduled_destination_station == incident.scheduled_destination_station and
|
38
|
+
self.effect_on_service == incident.effect_on_service
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'hpricot'
|
3
|
+
require 'affected_service'
|
4
|
+
|
5
|
+
class AffectedServicesReport
|
6
|
+
|
7
|
+
def initialize(html)
|
8
|
+
html.gsub!(/ /, ' ')
|
9
|
+
@doc = Hpricot(html)
|
10
|
+
end
|
11
|
+
|
12
|
+
def affected_services
|
13
|
+
@doc.search('*').collect do |elem|
|
14
|
+
if elem.text? and incident_text = elem.inner_text[/\d\d:\d\d.*/]
|
15
|
+
reason = find_previous_strong_element(elem).inner_text
|
16
|
+
AffectedService.new(reason.strip, incident_text.strip)
|
17
|
+
end
|
18
|
+
end.compact
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def find_previous_strong_element(elem)
|
24
|
+
return elem if (elem.respond_to?(:name) and elem.name == 'strong')
|
25
|
+
find_previous_strong_element(elem.previous)
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'hpricot'
|
3
|
+
require 'affected_services_report'
|
4
|
+
require 'csv'
|
5
|
+
|
6
|
+
class DailyPerformanceReport
|
7
|
+
|
8
|
+
def initialize(html)
|
9
|
+
@doc = Hpricot(html)
|
10
|
+
end
|
11
|
+
|
12
|
+
def date
|
13
|
+
date = (@doc/'h1').inner_text
|
14
|
+
date.gsub!(/\?/, ' ')
|
15
|
+
if date =~ /(.+?) (\d+) (.+)/
|
16
|
+
elsif (@doc/'h2').inner_text =~ /(.+?) (\d+) (.+)/
|
17
|
+
end
|
18
|
+
day_name, day, month_name = $1, $2, $3
|
19
|
+
date = [day, month_name[0..2].downcase, '2010'].join('-')
|
20
|
+
Date.parse(date)
|
21
|
+
end
|
22
|
+
|
23
|
+
def scheduled_services
|
24
|
+
report[/(\d+) train services were scheduled/, 1].to_i
|
25
|
+
end
|
26
|
+
|
27
|
+
def actual_services
|
28
|
+
report[/of which (\d+) ran/, 1].to_i
|
29
|
+
end
|
30
|
+
|
31
|
+
def services_within_five_minutes_of_schedule
|
32
|
+
report[/(\d+)% of services ran within 5 minutes of schedule/, 1].to_i
|
33
|
+
end
|
34
|
+
|
35
|
+
def affected_services
|
36
|
+
AffectedServicesReport.new(report_container.inner_html).affected_services
|
37
|
+
end
|
38
|
+
|
39
|
+
def to_csv
|
40
|
+
affected_services.collect do |service|
|
41
|
+
CSV.generate_line [date, service.reason_for_disruption, service.scheduled_start_time, service.scheduled_start_station, service.scheduled_destination_station, service.effect_on_service]
|
42
|
+
end.join("\n")
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def report
|
48
|
+
report_container.inner_text
|
49
|
+
end
|
50
|
+
|
51
|
+
def report_container
|
52
|
+
(@doc/'h1').first.parent.next_sibling
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
metadata
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: southeastern-daily-performance
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Chris Roos
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2010-02-05 00:00:00 +00:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description:
|
17
|
+
email: chris@seagul.co.uk
|
18
|
+
executables:
|
19
|
+
- sedpr-to-csv
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- README
|
24
|
+
files:
|
25
|
+
- README
|
26
|
+
- Rakefile
|
27
|
+
- combined.csv
|
28
|
+
- lib/sedpr/affected_service.rb
|
29
|
+
- lib/sedpr/affected_services_report.rb
|
30
|
+
- lib/sedpr/daily_performance_report.rb
|
31
|
+
- lib/sedpr.rb
|
32
|
+
has_rdoc: true
|
33
|
+
homepage: http://github.com/chrisroos/southeastern-daily-performance
|
34
|
+
licenses: []
|
35
|
+
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options:
|
38
|
+
- --main
|
39
|
+
- README
|
40
|
+
require_paths:
|
41
|
+
- lib
|
42
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - ">="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: "0"
|
47
|
+
version:
|
48
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: "0"
|
53
|
+
version:
|
54
|
+
requirements: []
|
55
|
+
|
56
|
+
rubyforge_project: southeastern-daily-performance
|
57
|
+
rubygems_version: 1.3.5
|
58
|
+
signing_key:
|
59
|
+
specification_version: 3
|
60
|
+
summary: Converts Southeaster Daily Performance reports from HTML to CSV
|
61
|
+
test_files: []
|
62
|
+
|