southeastern-daily-performance 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +19 -0
- data/Rakefile +24 -24
- data/bin/sedpr-to-csv +2 -2
- data/lib/sedpr.rb +3 -3
- data/lib/sedpr/affected_service.rb +35 -33
- data/lib/sedpr/affected_services_report.rb +20 -20
- data/lib/sedpr/daily_performance_report.rb +41 -39
- metadata +28 -10
- data/README +0 -18
data/README.md
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
## Installation
|
2
|
+
|
3
|
+
$ gem install southeastern-daily-performance -r http://gemcutter.org
|
4
|
+
|
5
|
+
## Usage
|
6
|
+
|
7
|
+
$ sedpr-to-csv <location-of-html>
|
8
|
+
|
9
|
+
## Examples
|
10
|
+
|
11
|
+
### Explicitly download html and convert local file
|
12
|
+
|
13
|
+
$ curl "http://www.southeasternrailway.co.uk/index.php/cms/pages/view/132" > sedpr.html
|
14
|
+
$ sedpr-to-csv sedpr.html
|
15
|
+
|
16
|
+
|
17
|
+
### Implicitly download html and convert
|
18
|
+
|
19
|
+
$ sedpr-to-csv http://www.southeasternrailway.co.uk/index.php/cms/pages/view/132
|
data/Rakefile
CHANGED
@@ -1,24 +1,24 @@
|
|
1
1
|
task :default => :test
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
3
|
+
require "rake/testtask"
|
4
|
+
Rake::TestTask.new do |t|
|
5
|
+
t.libs << "test"
|
6
|
+
t.test_files = FileList["test/**/*_test.rb"]
|
7
|
+
t.verbose = true
|
8
|
+
end
|
9
|
+
|
10
|
+
require File.join(File.dirname(__FILE__), 'lib', 'sedpr')
|
11
|
+
task 'convert' do
|
12
|
+
if data_dir = ENV['DATA_DIR']
|
13
|
+
Dir[File.join(data_dir, '*.html')].each do |html_file|
|
14
|
+
html = File.read(html_file)
|
15
|
+
puts DailyPerformanceReport.new(html).to_csv
|
16
|
+
end
|
17
|
+
else
|
18
|
+
puts "Usage: DATA_DIR=/path/to/html-reports rake convert"
|
19
|
+
exit 1
|
20
|
+
end
|
21
|
+
end
|
22
22
|
|
23
23
|
require "rubygems"
|
24
24
|
require "rake/gempackagetask"
|
@@ -33,24 +33,24 @@ spec = Gem::Specification.new do |s|
|
|
33
33
|
|
34
34
|
# Change these as appropriate
|
35
35
|
s.name = "southeastern-daily-performance"
|
36
|
-
s.version = "0.0.
|
36
|
+
s.version = "0.0.3"
|
37
37
|
s.summary = "Converts Southeaster Daily Performance reports from HTML to CSV"
|
38
38
|
s.author = "Chris Roos"
|
39
39
|
s.email = "chris@seagul.co.uk"
|
40
40
|
s.homepage = "http://github.com/chrisroos/southeastern-daily-performance"
|
41
41
|
|
42
42
|
s.has_rdoc = true
|
43
|
-
s.extra_rdoc_files = %w(README)
|
44
|
-
s.rdoc_options = %w(--main README)
|
43
|
+
s.extra_rdoc_files = %w(README.md)
|
44
|
+
s.rdoc_options = %w(--main README.md)
|
45
45
|
|
46
46
|
# Add any extra files to include in the gem
|
47
|
-
s.files = %w(README Rakefile) + Dir.glob("{bin,test,lib/**/*}")
|
47
|
+
s.files = %w(README.md Rakefile) + Dir.glob("{bin,test,lib/**/*}")
|
48
48
|
s.executables = FileList["bin/**"].map { |f| File.basename(f) }
|
49
49
|
s.require_paths = ["lib"]
|
50
50
|
|
51
51
|
# If you want to depend on other gems, add them here, along with any
|
52
52
|
# relevant versions
|
53
|
-
|
53
|
+
s.add_dependency('hpricot')
|
54
54
|
|
55
55
|
# If your tests use any gems, include them here
|
56
56
|
# s.add_development_dependency("mocha") # for example
|
data/bin/sedpr-to-csv
CHANGED
data/lib/sedpr.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require 'daily_performance_report'
|
1
|
+
require File.expand_path '../sedpr/affected_service', __FILE__
|
2
|
+
require File.expand_path '../sedpr/affected_services_report', __FILE__
|
3
|
+
require File.expand_path '../sedpr/daily_performance_report', __FILE__
|
@@ -1,41 +1,43 @@
|
|
1
|
-
|
1
|
+
module SoutheasternDailyPerformance
|
2
|
+
class AffectedService
|
2
3
|
|
3
|
-
|
4
|
-
|
5
|
-
|
4
|
+
attr_reader :reason_for_disruption
|
5
|
+
attr_reader :scheduled_start_time, :scheduled_start_station, :scheduled_destination_station
|
6
|
+
attr_reader :effect_on_service
|
6
7
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
end
|
19
|
-
if matches.compact.min && reason = reasons[matches.index(matches.compact.min)]
|
20
|
-
destination_and_effect_on_service =~ /(.*) (#{reason}.*)/i
|
21
|
-
else
|
22
|
-
unless destination_and_effect_on_service.split(' ').length == 1
|
23
|
-
warn "Warning. Unknown, or missing, affect on service: '#{incident_text}'"
|
8
|
+
def initialize(reason_for_disruption, incident_text)
|
9
|
+
@reason_for_disruption = reason_for_disruption
|
10
|
+
if incident_text =~ /(\d\d:\d\d) (.*?) ?(?:-|to|\?) (.*)/
|
11
|
+
@scheduled_start_time, @scheduled_start_station = $1, $2
|
12
|
+
destination_and_effect_on_service = $3
|
13
|
+
@scheduled_start_station.gsub!(/[^a-zA-Z ]/, '')
|
14
|
+
reasons = [
|
15
|
+
'cancelled', 'started', 'delayed by', 'did not call', 'terminated at', 'diverted'
|
16
|
+
]
|
17
|
+
matches = reasons.collect do |reason|
|
18
|
+
destination_and_effect_on_service =~ /#{reason}/i
|
24
19
|
end
|
25
|
-
|
20
|
+
if matches.compact.min && reason = reasons[matches.index(matches.compact.min)]
|
21
|
+
destination_and_effect_on_service =~ /(.*) (#{reason}.*)/i
|
22
|
+
else
|
23
|
+
unless destination_and_effect_on_service.split(' ').length == 1
|
24
|
+
warn "Warning. Unknown, or missing, affect on service: '#{incident_text}'"
|
25
|
+
end
|
26
|
+
destination_and_effect_on_service =~ /(.*)/
|
27
|
+
end
|
28
|
+
@scheduled_destination_station, @effect_on_service = $1, ($2||'')
|
29
|
+
else
|
30
|
+
warn "Warning. Cannot parse service details: '#{incident_text}'"
|
26
31
|
end
|
27
|
-
@scheduled_destination_station, @effect_on_service = $1, ($2||'')
|
28
|
-
else
|
29
|
-
warn "Warning. Cannot parse service details: '#{incident_text}'"
|
30
32
|
end
|
31
|
-
end
|
32
33
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
34
|
+
def ==(incident)
|
35
|
+
self.reason_for_disruption == incident.reason_for_disruption and
|
36
|
+
self.scheduled_start_time == incident.scheduled_start_time and
|
37
|
+
self.scheduled_start_station == incident.scheduled_start_station and
|
38
|
+
self.scheduled_destination_station == incident.scheduled_destination_station and
|
39
|
+
self.effect_on_service == incident.effect_on_service
|
40
|
+
end
|
40
41
|
|
42
|
+
end
|
41
43
|
end
|
@@ -1,28 +1,28 @@
|
|
1
|
-
require 'rubygems'
|
2
1
|
require 'hpricot'
|
3
|
-
require 'affected_service'
|
4
2
|
|
5
|
-
|
3
|
+
module SoutheasternDailyPerformance
|
4
|
+
class AffectedServicesReport
|
6
5
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
6
|
+
def initialize(html)
|
7
|
+
html.gsub!(/ /, ' ')
|
8
|
+
@doc = Hpricot(html)
|
9
|
+
end
|
11
10
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
11
|
+
def affected_services
|
12
|
+
@doc.search('*').collect do |elem|
|
13
|
+
if elem.text? and incident_text = elem.inner_text[/\d\d:\d\d.*/]
|
14
|
+
reason = find_previous_strong_element(elem).inner_text
|
15
|
+
AffectedService.new(reason.strip, incident_text.strip)
|
16
|
+
end
|
17
|
+
end.compact
|
18
|
+
end
|
20
19
|
|
21
|
-
|
20
|
+
private
|
22
21
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
22
|
+
def find_previous_strong_element(elem)
|
23
|
+
return elem if (elem.respond_to?(:name) and elem.name == 'strong')
|
24
|
+
find_previous_strong_element(elem.previous)
|
25
|
+
end
|
27
26
|
|
27
|
+
end
|
28
28
|
end
|
@@ -1,55 +1,57 @@
|
|
1
|
-
require 'rubygems'
|
2
1
|
require 'hpricot'
|
3
|
-
require 'affected_services_report'
|
4
2
|
require 'csv'
|
3
|
+
require 'date'
|
5
4
|
|
6
|
-
|
5
|
+
module SoutheasternDailyPerformance
|
6
|
+
class DailyPerformanceReport
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
8
|
+
def initialize(html)
|
9
|
+
html = html.gsub(%r! !, ' ')
|
10
|
+
@doc = Hpricot(html)
|
11
|
+
end
|
11
12
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
13
|
+
def date
|
14
|
+
date = (@doc/'h1').inner_text
|
15
|
+
date.gsub!(/\?/, ' ')
|
16
|
+
if date =~ /(.+?) (\d+) (.+)/
|
17
|
+
elsif (@doc/'h2').inner_text =~ /(.+?) (\d+) (.+)/
|
18
|
+
end
|
19
|
+
day_name, day, month_name = $1, $2, $3
|
20
|
+
date = [day, month_name[0..2].downcase, '2010'].join('-')
|
21
|
+
Date.parse(date)
|
17
22
|
end
|
18
|
-
day_name, day, month_name = $1, $2, $3
|
19
|
-
date = [day, month_name[0..2].downcase, '2010'].join('-')
|
20
|
-
Date.parse(date)
|
21
|
-
end
|
22
23
|
|
23
|
-
|
24
|
-
|
25
|
-
|
24
|
+
def scheduled_services
|
25
|
+
report[/(\d+) train services were scheduled/, 1].to_i
|
26
|
+
end
|
26
27
|
|
27
|
-
|
28
|
-
|
29
|
-
|
28
|
+
def actual_services
|
29
|
+
report[/of which (\d+) ran/, 1].to_i
|
30
|
+
end
|
30
31
|
|
31
|
-
|
32
|
-
|
33
|
-
|
32
|
+
def services_within_five_minutes_of_schedule
|
33
|
+
report[/(\d+)% of services ran within 5 minutes of schedule/, 1].to_i
|
34
|
+
end
|
34
35
|
|
35
|
-
|
36
|
-
|
37
|
-
|
36
|
+
def affected_services
|
37
|
+
AffectedServicesReport.new(report_container.inner_html).affected_services
|
38
|
+
end
|
38
39
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
40
|
+
def to_csv
|
41
|
+
affected_services.collect do |service|
|
42
|
+
CSV.generate_line [date, service.reason_for_disruption, service.scheduled_start_time, service.scheduled_start_station, service.scheduled_destination_station, service.effect_on_service]
|
43
|
+
end.join("\n")
|
44
|
+
end
|
44
45
|
|
45
|
-
|
46
|
+
private
|
46
47
|
|
47
|
-
|
48
|
-
|
49
|
-
|
48
|
+
def report
|
49
|
+
report_container.inner_text
|
50
|
+
end
|
50
51
|
|
51
|
-
|
52
|
-
|
53
|
-
|
52
|
+
def report_container
|
53
|
+
(@doc/'h1').first.parent.next_sibling
|
54
|
+
end
|
54
55
|
|
56
|
+
end
|
55
57
|
end
|
metadata
CHANGED
@@ -1,7 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: southeastern-daily-performance
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 3
|
9
|
+
version: 0.0.3
|
5
10
|
platform: ruby
|
6
11
|
authors:
|
7
12
|
- Chris Roos
|
@@ -9,10 +14,21 @@ autorequire:
|
|
9
14
|
bindir: bin
|
10
15
|
cert_chain: []
|
11
16
|
|
12
|
-
date: 2010-
|
17
|
+
date: 2010-10-09 00:00:00 +01:00
|
13
18
|
default_executable:
|
14
|
-
dependencies:
|
15
|
-
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: hpricot
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
segments:
|
28
|
+
- 0
|
29
|
+
version: "0"
|
30
|
+
type: :runtime
|
31
|
+
version_requirements: *id001
|
16
32
|
description:
|
17
33
|
email: chris@seagul.co.uk
|
18
34
|
executables:
|
@@ -20,9 +36,9 @@ executables:
|
|
20
36
|
extensions: []
|
21
37
|
|
22
38
|
extra_rdoc_files:
|
23
|
-
- README
|
39
|
+
- README.md
|
24
40
|
files:
|
25
|
-
- README
|
41
|
+
- README.md
|
26
42
|
- Rakefile
|
27
43
|
- lib/sedpr/affected_service.rb
|
28
44
|
- lib/sedpr/affected_services_report.rb
|
@@ -35,25 +51,27 @@ licenses: []
|
|
35
51
|
post_install_message:
|
36
52
|
rdoc_options:
|
37
53
|
- --main
|
38
|
-
- README
|
54
|
+
- README.md
|
39
55
|
require_paths:
|
40
56
|
- lib
|
41
57
|
required_ruby_version: !ruby/object:Gem::Requirement
|
42
58
|
requirements:
|
43
59
|
- - ">="
|
44
60
|
- !ruby/object:Gem::Version
|
61
|
+
segments:
|
62
|
+
- 0
|
45
63
|
version: "0"
|
46
|
-
version:
|
47
64
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
48
65
|
requirements:
|
49
66
|
- - ">="
|
50
67
|
- !ruby/object:Gem::Version
|
68
|
+
segments:
|
69
|
+
- 0
|
51
70
|
version: "0"
|
52
|
-
version:
|
53
71
|
requirements: []
|
54
72
|
|
55
73
|
rubyforge_project: southeastern-daily-performance
|
56
|
-
rubygems_version: 1.3.
|
74
|
+
rubygems_version: 1.3.6
|
57
75
|
signing_key:
|
58
76
|
specification_version: 3
|
59
77
|
summary: Converts Southeaster Daily Performance reports from HTML to CSV
|
data/README
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
Installation
|
2
|
-
============
|
3
|
-
$ gem install southeastern-daily-performance -r http://gemcutter.org
|
4
|
-
|
5
|
-
|
6
|
-
Usage
|
7
|
-
=====
|
8
|
-
$ sedpr-to-csv <location-of-html>
|
9
|
-
|
10
|
-
|
11
|
-
Examples
|
12
|
-
========
|
13
|
-
# Explicitly download html and convert local file
|
14
|
-
$ curl "http://www.southeasternrailway.co.uk/index.php/cms/pages/view/132" > sedpr.html
|
15
|
-
$ sedpr-to-csv sedpr.html
|
16
|
-
|
17
|
-
# Implicitly download html and convert
|
18
|
-
$ sedpr-to-csv http://www.southeasternrailway.co.uk/index.php/cms/pages/view/132
|