southeastern-daily-performance 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,3 @@
1
+ $: << File.join(File.dirname(__FILE__), 'sedpr')
2
+
3
+ require 'daily_performance_report'
@@ -0,0 +1,41 @@
1
+ class AffectedService
2
+
3
+ attr_reader :reason_for_disruption
4
+ attr_reader :scheduled_start_time, :scheduled_start_station, :scheduled_destination_station
5
+ attr_reader :effect_on_service
6
+
7
+ def initialize(reason_for_disruption, incident_text)
8
+ @reason_for_disruption = reason_for_disruption
9
+ if incident_text =~ /(\d\d:\d\d) (.*?) ?(?:-|to|\?) (.*)/
10
+ @scheduled_start_time, @scheduled_start_station = $1, $2
11
+ destination_and_effect_on_service = $3
12
+ @scheduled_start_station.gsub!(/[^a-zA-Z ]/, '')
13
+ reasons = [
14
+ 'cancelled', 'started', 'delayed by', 'did not call', 'terminated at', 'diverted'
15
+ ]
16
+ matches = reasons.collect do |reason|
17
+ destination_and_effect_on_service =~ /#{reason}/
18
+ end
19
+ if matches.compact.min && reason = reasons[matches.index(matches.compact.min)]
20
+ destination_and_effect_on_service =~ /(.*) (#{reason}.*)/
21
+ else
22
+ unless destination_and_effect_on_service.split(' ').length == 1
23
+ warn "Warning. Unknown, or missing, affect on service: '#{incident_text}'"
24
+ end
25
+ destination_and_effect_on_service =~ /(.*)/
26
+ end
27
+ @scheduled_destination_station, @effect_on_service = $1, ($2||'')
28
+ else
29
+ warn "Warning. Cannot parse service details: '#{incident_text}'"
30
+ end
31
+ end
32
+
33
+ def ==(incident)
34
+ self.reason_for_disruption == incident.reason_for_disruption and
35
+ self.scheduled_start_time == incident.scheduled_start_time and
36
+ self.scheduled_start_station == incident.scheduled_start_station and
37
+ self.scheduled_destination_station == incident.scheduled_destination_station and
38
+ self.effect_on_service == incident.effect_on_service
39
+ end
40
+
41
+ end
@@ -0,0 +1,28 @@
1
+ require 'rubygems'
2
+ require 'hpricot'
3
+ require 'affected_service'
4
+
5
+ class AffectedServicesReport
6
+
7
+ def initialize(html)
8
+ html.gsub!(/&nbsp;/, ' ')
9
+ @doc = Hpricot(html)
10
+ end
11
+
12
+ def affected_services
13
+ @doc.search('*').collect do |elem|
14
+ if elem.text? and incident_text = elem.inner_text[/\d\d:\d\d.*/]
15
+ reason = find_previous_strong_element(elem).inner_text
16
+ AffectedService.new(reason.strip, incident_text.strip)
17
+ end
18
+ end.compact
19
+ end
20
+
21
+ private
22
+
23
+ def find_previous_strong_element(elem)
24
+ return elem if (elem.respond_to?(:name) and elem.name == 'strong')
25
+ find_previous_strong_element(elem.previous)
26
+ end
27
+
28
+ end
@@ -0,0 +1,55 @@
1
+ require 'rubygems'
2
+ require 'hpricot'
3
+ require 'affected_services_report'
4
+ require 'csv'
5
+
6
+ class DailyPerformanceReport
7
+
8
+ def initialize(html)
9
+ @doc = Hpricot(html)
10
+ end
11
+
12
+ def date
13
+ date = (@doc/'h1').inner_text
14
+ date.gsub!(/\?/, ' ')
15
+ if date =~ /(.+?) (\d+) (.+)/
16
+ elsif (@doc/'h2').inner_text =~ /(.+?) (\d+) (.+)/
17
+ end
18
+ day_name, day, month_name = $1, $2, $3
19
+ date = [day, month_name[0..2].downcase, '2010'].join('-')
20
+ Date.parse(date)
21
+ end
22
+
23
+ def scheduled_services
24
+ report[/(\d+) train services were scheduled/, 1].to_i
25
+ end
26
+
27
+ def actual_services
28
+ report[/of which (\d+) ran/, 1].to_i
29
+ end
30
+
31
+ def services_within_five_minutes_of_schedule
32
+ report[/(\d+)% of services ran within 5 minutes of schedule/, 1].to_i
33
+ end
34
+
35
+ def affected_services
36
+ AffectedServicesReport.new(report_container.inner_html).affected_services
37
+ end
38
+
39
+ def to_csv
40
+ affected_services.collect do |service|
41
+ CSV.generate_line [date, service.reason_for_disruption, service.scheduled_start_time, service.scheduled_start_station, service.scheduled_destination_station, service.effect_on_service]
42
+ end.join("\n")
43
+ end
44
+
45
+ private
46
+
47
+ def report
48
+ report_container.inner_text
49
+ end
50
+
51
+ def report_container
52
+ (@doc/'h1').first.parent.next_sibling
53
+ end
54
+
55
+ end
metadata ADDED
@@ -0,0 +1,62 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: southeastern-daily-performance
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Chris Roos
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-02-05 00:00:00 +00:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: chris@seagul.co.uk
18
+ executables:
19
+ - sedpr-to-csv
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - README
24
+ files:
25
+ - README
26
+ - Rakefile
27
+ - combined.csv
28
+ - lib/sedpr/affected_service.rb
29
+ - lib/sedpr/affected_services_report.rb
30
+ - lib/sedpr/daily_performance_report.rb
31
+ - lib/sedpr.rb
32
+ has_rdoc: true
33
+ homepage: http://github.com/chrisroos/southeastern-daily-performance
34
+ licenses: []
35
+
36
+ post_install_message:
37
+ rdoc_options:
38
+ - --main
39
+ - README
40
+ require_paths:
41
+ - lib
42
+ required_ruby_version: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: "0"
47
+ version:
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: "0"
53
+ version:
54
+ requirements: []
55
+
56
+ rubyforge_project: southeastern-daily-performance
57
+ rubygems_version: 1.3.5
58
+ signing_key:
59
+ specification_version: 3
60
+ summary: Converts Southeaster Daily Performance reports from HTML to CSV
61
+ test_files: []
62
+