southeastern-daily-performance 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +25 -0
- data/Rakefile +128 -0
- data/bin/sedpr-to-csv +12 -0
- data/combined.csv +1210 -0
- data/lib/sedpr.rb +3 -0
- data/lib/sedpr/affected_service.rb +41 -0
- data/lib/sedpr/affected_services_report.rb +28 -0
- data/lib/sedpr/daily_performance_report.rb +55 -0
- metadata +62 -0
data/lib/sedpr.rb
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
class AffectedService
|
2
|
+
|
3
|
+
attr_reader :reason_for_disruption
|
4
|
+
attr_reader :scheduled_start_time, :scheduled_start_station, :scheduled_destination_station
|
5
|
+
attr_reader :effect_on_service
|
6
|
+
|
7
|
+
def initialize(reason_for_disruption, incident_text)
|
8
|
+
@reason_for_disruption = reason_for_disruption
|
9
|
+
if incident_text =~ /(\d\d:\d\d) (.*?) ?(?:-|to|\?) (.*)/
|
10
|
+
@scheduled_start_time, @scheduled_start_station = $1, $2
|
11
|
+
destination_and_effect_on_service = $3
|
12
|
+
@scheduled_start_station.gsub!(/[^a-zA-Z ]/, '')
|
13
|
+
reasons = [
|
14
|
+
'cancelled', 'started', 'delayed by', 'did not call', 'terminated at', 'diverted'
|
15
|
+
]
|
16
|
+
matches = reasons.collect do |reason|
|
17
|
+
destination_and_effect_on_service =~ /#{reason}/
|
18
|
+
end
|
19
|
+
if matches.compact.min && reason = reasons[matches.index(matches.compact.min)]
|
20
|
+
destination_and_effect_on_service =~ /(.*) (#{reason}.*)/
|
21
|
+
else
|
22
|
+
unless destination_and_effect_on_service.split(' ').length == 1
|
23
|
+
warn "Warning. Unknown, or missing, affect on service: '#{incident_text}'"
|
24
|
+
end
|
25
|
+
destination_and_effect_on_service =~ /(.*)/
|
26
|
+
end
|
27
|
+
@scheduled_destination_station, @effect_on_service = $1, ($2||'')
|
28
|
+
else
|
29
|
+
warn "Warning. Cannot parse service details: '#{incident_text}'"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def ==(incident)
|
34
|
+
self.reason_for_disruption == incident.reason_for_disruption and
|
35
|
+
self.scheduled_start_time == incident.scheduled_start_time and
|
36
|
+
self.scheduled_start_station == incident.scheduled_start_station and
|
37
|
+
self.scheduled_destination_station == incident.scheduled_destination_station and
|
38
|
+
self.effect_on_service == incident.effect_on_service
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'hpricot'
|
3
|
+
require 'affected_service'
|
4
|
+
|
5
|
+
class AffectedServicesReport
|
6
|
+
|
7
|
+
def initialize(html)
|
8
|
+
html.gsub!(/ /, ' ')
|
9
|
+
@doc = Hpricot(html)
|
10
|
+
end
|
11
|
+
|
12
|
+
def affected_services
|
13
|
+
@doc.search('*').collect do |elem|
|
14
|
+
if elem.text? and incident_text = elem.inner_text[/\d\d:\d\d.*/]
|
15
|
+
reason = find_previous_strong_element(elem).inner_text
|
16
|
+
AffectedService.new(reason.strip, incident_text.strip)
|
17
|
+
end
|
18
|
+
end.compact
|
19
|
+
end
|
20
|
+
|
21
|
+
private
|
22
|
+
|
23
|
+
def find_previous_strong_element(elem)
|
24
|
+
return elem if (elem.respond_to?(:name) and elem.name == 'strong')
|
25
|
+
find_previous_strong_element(elem.previous)
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'hpricot'
|
3
|
+
require 'affected_services_report'
|
4
|
+
require 'csv'
|
5
|
+
|
6
|
+
class DailyPerformanceReport
|
7
|
+
|
8
|
+
def initialize(html)
|
9
|
+
@doc = Hpricot(html)
|
10
|
+
end
|
11
|
+
|
12
|
+
def date
|
13
|
+
date = (@doc/'h1').inner_text
|
14
|
+
date.gsub!(/\?/, ' ')
|
15
|
+
if date =~ /(.+?) (\d+) (.+)/
|
16
|
+
elsif (@doc/'h2').inner_text =~ /(.+?) (\d+) (.+)/
|
17
|
+
end
|
18
|
+
day_name, day, month_name = $1, $2, $3
|
19
|
+
date = [day, month_name[0..2].downcase, '2010'].join('-')
|
20
|
+
Date.parse(date)
|
21
|
+
end
|
22
|
+
|
23
|
+
def scheduled_services
|
24
|
+
report[/(\d+) train services were scheduled/, 1].to_i
|
25
|
+
end
|
26
|
+
|
27
|
+
def actual_services
|
28
|
+
report[/of which (\d+) ran/, 1].to_i
|
29
|
+
end
|
30
|
+
|
31
|
+
def services_within_five_minutes_of_schedule
|
32
|
+
report[/(\d+)% of services ran within 5 minutes of schedule/, 1].to_i
|
33
|
+
end
|
34
|
+
|
35
|
+
def affected_services
|
36
|
+
AffectedServicesReport.new(report_container.inner_html).affected_services
|
37
|
+
end
|
38
|
+
|
39
|
+
def to_csv
|
40
|
+
affected_services.collect do |service|
|
41
|
+
CSV.generate_line [date, service.reason_for_disruption, service.scheduled_start_time, service.scheduled_start_station, service.scheduled_destination_station, service.effect_on_service]
|
42
|
+
end.join("\n")
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def report
|
48
|
+
report_container.inner_text
|
49
|
+
end
|
50
|
+
|
51
|
+
def report_container
|
52
|
+
(@doc/'h1').first.parent.next_sibling
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
metadata
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: southeastern-daily-performance
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Chris Roos
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2010-02-05 00:00:00 +00:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description:
|
17
|
+
email: chris@seagul.co.uk
|
18
|
+
executables:
|
19
|
+
- sedpr-to-csv
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- README
|
24
|
+
files:
|
25
|
+
- README
|
26
|
+
- Rakefile
|
27
|
+
- combined.csv
|
28
|
+
- lib/sedpr/affected_service.rb
|
29
|
+
- lib/sedpr/affected_services_report.rb
|
30
|
+
- lib/sedpr/daily_performance_report.rb
|
31
|
+
- lib/sedpr.rb
|
32
|
+
has_rdoc: true
|
33
|
+
homepage: http://github.com/chrisroos/southeastern-daily-performance
|
34
|
+
licenses: []
|
35
|
+
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options:
|
38
|
+
- --main
|
39
|
+
- README
|
40
|
+
require_paths:
|
41
|
+
- lib
|
42
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - ">="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: "0"
|
47
|
+
version:
|
48
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: "0"
|
53
|
+
version:
|
54
|
+
requirements: []
|
55
|
+
|
56
|
+
rubyforge_project: southeastern-daily-performance
|
57
|
+
rubygems_version: 1.3.5
|
58
|
+
signing_key:
|
59
|
+
specification_version: 3
|
60
|
+
summary: Converts Southeaster Daily Performance reports from HTML to CSV
|
61
|
+
test_files: []
|
62
|
+
|