southeastern-daily-performance 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ $: << File.join(File.dirname(__FILE__), 'sedpr')
2
+
3
+ require 'daily_performance_report'
@@ -0,0 +1,41 @@
1
+ class AffectedService
2
+
3
+ attr_reader :reason_for_disruption
4
+ attr_reader :scheduled_start_time, :scheduled_start_station, :scheduled_destination_station
5
+ attr_reader :effect_on_service
6
+
7
+ def initialize(reason_for_disruption, incident_text)
8
+ @reason_for_disruption = reason_for_disruption
9
+ if incident_text =~ /(\d\d:\d\d) (.*?) ?(?:-|to|\?) (.*)/
10
+ @scheduled_start_time, @scheduled_start_station = $1, $2
11
+ destination_and_effect_on_service = $3
12
+ @scheduled_start_station.gsub!(/[^a-zA-Z ]/, '')
13
+ reasons = [
14
+ 'cancelled', 'started', 'delayed by', 'did not call', 'terminated at', 'diverted'
15
+ ]
16
+ matches = reasons.collect do |reason|
17
+ destination_and_effect_on_service =~ /#{reason}/
18
+ end
19
+ if matches.compact.min && reason = reasons[matches.index(matches.compact.min)]
20
+ destination_and_effect_on_service =~ /(.*) (#{reason}.*)/
21
+ else
22
+ unless destination_and_effect_on_service.split(' ').length == 1
23
+ warn "Warning. Unknown, or missing, affect on service: '#{incident_text}'"
24
+ end
25
+ destination_and_effect_on_service =~ /(.*)/
26
+ end
27
+ @scheduled_destination_station, @effect_on_service = $1, ($2||'')
28
+ else
29
+ warn "Warning. Cannot parse service details: '#{incident_text}'"
30
+ end
31
+ end
32
+
33
+ def ==(incident)
34
+ self.reason_for_disruption == incident.reason_for_disruption and
35
+ self.scheduled_start_time == incident.scheduled_start_time and
36
+ self.scheduled_start_station == incident.scheduled_start_station and
37
+ self.scheduled_destination_station == incident.scheduled_destination_station and
38
+ self.effect_on_service == incident.effect_on_service
39
+ end
40
+
41
+ end
@@ -0,0 +1,28 @@
1
+ require 'rubygems'
2
+ require 'hpricot'
3
+ require 'affected_service'
4
+
5
+ class AffectedServicesReport
6
+
7
+ def initialize(html)
8
+ html.gsub!(/&nbsp;/, ' ')
9
+ @doc = Hpricot(html)
10
+ end
11
+
12
+ def affected_services
13
+ @doc.search('*').collect do |elem|
14
+ if elem.text? and incident_text = elem.inner_text[/\d\d:\d\d.*/]
15
+ reason = find_previous_strong_element(elem).inner_text
16
+ AffectedService.new(reason.strip, incident_text.strip)
17
+ end
18
+ end.compact
19
+ end
20
+
21
+ private
22
+
23
+ def find_previous_strong_element(elem)
24
+ return elem if (elem.respond_to?(:name) and elem.name == 'strong')
25
+ find_previous_strong_element(elem.previous)
26
+ end
27
+
28
+ end
@@ -0,0 +1,55 @@
1
+ require 'rubygems'
2
+ require 'hpricot'
3
+ require 'affected_services_report'
4
+ require 'csv'
5
+
6
+ class DailyPerformanceReport
7
+
8
+ def initialize(html)
9
+ @doc = Hpricot(html)
10
+ end
11
+
12
+ def date
13
+ date = (@doc/'h1').inner_text
14
+ date.gsub!(/\?/, ' ')
15
+ if date =~ /(.+?) (\d+) (.+)/
16
+ elsif (@doc/'h2').inner_text =~ /(.+?) (\d+) (.+)/
17
+ end
18
+ day_name, day, month_name = $1, $2, $3
19
+ date = [day, month_name[0..2].downcase, '2010'].join('-')
20
+ Date.parse(date)
21
+ end
22
+
23
+ def scheduled_services
24
+ report[/(\d+) train services were scheduled/, 1].to_i
25
+ end
26
+
27
+ def actual_services
28
+ report[/of which (\d+) ran/, 1].to_i
29
+ end
30
+
31
+ def services_within_five_minutes_of_schedule
32
+ report[/(\d+)% of services ran within 5 minutes of schedule/, 1].to_i
33
+ end
34
+
35
+ def affected_services
36
+ AffectedServicesReport.new(report_container.inner_html).affected_services
37
+ end
38
+
39
+ def to_csv
40
+ affected_services.collect do |service|
41
+ CSV.generate_line [date, service.reason_for_disruption, service.scheduled_start_time, service.scheduled_start_station, service.scheduled_destination_station, service.effect_on_service]
42
+ end.join("\n")
43
+ end
44
+
45
+ private
46
+
47
+ def report
48
+ report_container.inner_text
49
+ end
50
+
51
+ def report_container
52
+ (@doc/'h1').first.parent.next_sibling
53
+ end
54
+
55
+ end
metadata ADDED
@@ -0,0 +1,62 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: southeastern-daily-performance
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Chris Roos
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-02-05 00:00:00 +00:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: chris@seagul.co.uk
18
+ executables:
19
+ - sedpr-to-csv
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - README
24
+ files:
25
+ - README
26
+ - Rakefile
27
+ - combined.csv
28
+ - lib/sedpr/affected_service.rb
29
+ - lib/sedpr/affected_services_report.rb
30
+ - lib/sedpr/daily_performance_report.rb
31
+ - lib/sedpr.rb
32
+ has_rdoc: true
33
+ homepage: http://github.com/chrisroos/southeastern-daily-performance
34
+ licenses: []
35
+
36
+ post_install_message:
37
+ rdoc_options:
38
+ - --main
39
+ - README
40
+ require_paths:
41
+ - lib
42
+ required_ruby_version: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: "0"
47
+ version:
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: "0"
53
+ version:
54
+ requirements: []
55
+
56
+ rubyforge_project: southeastern-daily-performance
57
+ rubygems_version: 1.3.5
58
+ signing_key:
59
+ specification_version: 3
60
+ summary: Converts Southeaster Daily Performance reports from HTML to CSV
61
+ test_files: []
62
+