southeastern-daily-performance 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +19 -0
- data/Rakefile +24 -24
- data/bin/sedpr-to-csv +2 -2
- data/lib/sedpr.rb +3 -3
- data/lib/sedpr/affected_service.rb +35 -33
- data/lib/sedpr/affected_services_report.rb +20 -20
- data/lib/sedpr/daily_performance_report.rb +41 -39
- metadata +28 -10
- data/README +0 -18
data/README.md
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
## Installation
|
2
|
+
|
3
|
+
$ gem install southeastern-daily-performance -r http://gemcutter.org
|
4
|
+
|
5
|
+
## Usage
|
6
|
+
|
7
|
+
$ sedpr-to-csv <location-of-html>
|
8
|
+
|
9
|
+
## Examples
|
10
|
+
|
11
|
+
### Explicitly download html and convert local file
|
12
|
+
|
13
|
+
$ curl "http://www.southeasternrailway.co.uk/index.php/cms/pages/view/132" > sedpr.html
|
14
|
+
$ sedpr-to-csv sedpr.html
|
15
|
+
|
16
|
+
|
17
|
+
### Implicitly download html and convert
|
18
|
+
|
19
|
+
$ sedpr-to-csv http://www.southeasternrailway.co.uk/index.php/cms/pages/view/132
|
data/Rakefile
CHANGED
@@ -1,24 +1,24 @@
|
|
1
1
|
task :default => :test
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
3
|
+
require "rake/testtask"
|
4
|
+
Rake::TestTask.new do |t|
|
5
|
+
t.libs << "test"
|
6
|
+
t.test_files = FileList["test/**/*_test.rb"]
|
7
|
+
t.verbose = true
|
8
|
+
end
|
9
|
+
|
10
|
+
require File.join(File.dirname(__FILE__), 'lib', 'sedpr')
|
11
|
+
task 'convert' do
|
12
|
+
if data_dir = ENV['DATA_DIR']
|
13
|
+
Dir[File.join(data_dir, '*.html')].each do |html_file|
|
14
|
+
html = File.read(html_file)
|
15
|
+
puts DailyPerformanceReport.new(html).to_csv
|
16
|
+
end
|
17
|
+
else
|
18
|
+
puts "Usage: DATA_DIR=/path/to/html-reports rake convert"
|
19
|
+
exit 1
|
20
|
+
end
|
21
|
+
end
|
22
22
|
|
23
23
|
require "rubygems"
|
24
24
|
require "rake/gempackagetask"
|
@@ -33,24 +33,24 @@ spec = Gem::Specification.new do |s|
|
|
33
33
|
|
34
34
|
# Change these as appropriate
|
35
35
|
s.name = "southeastern-daily-performance"
|
36
|
-
s.version = "0.0.
|
36
|
+
s.version = "0.0.3"
|
37
37
|
s.summary = "Converts Southeaster Daily Performance reports from HTML to CSV"
|
38
38
|
s.author = "Chris Roos"
|
39
39
|
s.email = "chris@seagul.co.uk"
|
40
40
|
s.homepage = "http://github.com/chrisroos/southeastern-daily-performance"
|
41
41
|
|
42
42
|
s.has_rdoc = true
|
43
|
-
s.extra_rdoc_files = %w(README)
|
44
|
-
s.rdoc_options = %w(--main README)
|
43
|
+
s.extra_rdoc_files = %w(README.md)
|
44
|
+
s.rdoc_options = %w(--main README.md)
|
45
45
|
|
46
46
|
# Add any extra files to include in the gem
|
47
|
-
s.files = %w(README Rakefile) + Dir.glob("{bin,test,lib/**/*}")
|
47
|
+
s.files = %w(README.md Rakefile) + Dir.glob("{bin,test,lib/**/*}")
|
48
48
|
s.executables = FileList["bin/**"].map { |f| File.basename(f) }
|
49
49
|
s.require_paths = ["lib"]
|
50
50
|
|
51
51
|
# If you want to depend on other gems, add them here, along with any
|
52
52
|
# relevant versions
|
53
|
-
|
53
|
+
s.add_dependency('hpricot')
|
54
54
|
|
55
55
|
# If your tests use any gems, include them here
|
56
56
|
# s.add_development_dependency("mocha") # for example
|
data/bin/sedpr-to-csv
CHANGED
data/lib/sedpr.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require 'daily_performance_report'
|
1
|
+
require File.expand_path '../sedpr/affected_service', __FILE__
|
2
|
+
require File.expand_path '../sedpr/affected_services_report', __FILE__
|
3
|
+
require File.expand_path '../sedpr/daily_performance_report', __FILE__
|
@@ -1,41 +1,43 @@
|
|
1
|
-
|
1
|
+
module SoutheasternDailyPerformance
|
2
|
+
class AffectedService
|
2
3
|
|
3
|
-
|
4
|
-
|
5
|
-
|
4
|
+
attr_reader :reason_for_disruption
|
5
|
+
attr_reader :scheduled_start_time, :scheduled_start_station, :scheduled_destination_station
|
6
|
+
attr_reader :effect_on_service
|
6
7
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
end
|
19
|
-
if matches.compact.min && reason = reasons[matches.index(matches.compact.min)]
|
20
|
-
destination_and_effect_on_service =~ /(.*) (#{reason}.*)/i
|
21
|
-
else
|
22
|
-
unless destination_and_effect_on_service.split(' ').length == 1
|
23
|
-
warn "Warning. Unknown, or missing, affect on service: '#{incident_text}'"
|
8
|
+
def initialize(reason_for_disruption, incident_text)
|
9
|
+
@reason_for_disruption = reason_for_disruption
|
10
|
+
if incident_text =~ /(\d\d:\d\d) (.*?) ?(?:-|to|\?) (.*)/
|
11
|
+
@scheduled_start_time, @scheduled_start_station = $1, $2
|
12
|
+
destination_and_effect_on_service = $3
|
13
|
+
@scheduled_start_station.gsub!(/[^a-zA-Z ]/, '')
|
14
|
+
reasons = [
|
15
|
+
'cancelled', 'started', 'delayed by', 'did not call', 'terminated at', 'diverted'
|
16
|
+
]
|
17
|
+
matches = reasons.collect do |reason|
|
18
|
+
destination_and_effect_on_service =~ /#{reason}/i
|
24
19
|
end
|
25
|
-
|
20
|
+
if matches.compact.min && reason = reasons[matches.index(matches.compact.min)]
|
21
|
+
destination_and_effect_on_service =~ /(.*) (#{reason}.*)/i
|
22
|
+
else
|
23
|
+
unless destination_and_effect_on_service.split(' ').length == 1
|
24
|
+
warn "Warning. Unknown, or missing, affect on service: '#{incident_text}'"
|
25
|
+
end
|
26
|
+
destination_and_effect_on_service =~ /(.*)/
|
27
|
+
end
|
28
|
+
@scheduled_destination_station, @effect_on_service = $1, ($2||'')
|
29
|
+
else
|
30
|
+
warn "Warning. Cannot parse service details: '#{incident_text}'"
|
26
31
|
end
|
27
|
-
@scheduled_destination_station, @effect_on_service = $1, ($2||'')
|
28
|
-
else
|
29
|
-
warn "Warning. Cannot parse service details: '#{incident_text}'"
|
30
32
|
end
|
31
|
-
end
|
32
33
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
34
|
+
def ==(incident)
|
35
|
+
self.reason_for_disruption == incident.reason_for_disruption and
|
36
|
+
self.scheduled_start_time == incident.scheduled_start_time and
|
37
|
+
self.scheduled_start_station == incident.scheduled_start_station and
|
38
|
+
self.scheduled_destination_station == incident.scheduled_destination_station and
|
39
|
+
self.effect_on_service == incident.effect_on_service
|
40
|
+
end
|
40
41
|
|
42
|
+
end
|
41
43
|
end
|
@@ -1,28 +1,28 @@
|
|
1
|
-
require 'rubygems'
|
2
1
|
require 'hpricot'
|
3
|
-
require 'affected_service'
|
4
2
|
|
5
|
-
|
3
|
+
module SoutheasternDailyPerformance
|
4
|
+
class AffectedServicesReport
|
6
5
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
6
|
+
def initialize(html)
|
7
|
+
html.gsub!(/ /, ' ')
|
8
|
+
@doc = Hpricot(html)
|
9
|
+
end
|
11
10
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
11
|
+
def affected_services
|
12
|
+
@doc.search('*').collect do |elem|
|
13
|
+
if elem.text? and incident_text = elem.inner_text[/\d\d:\d\d.*/]
|
14
|
+
reason = find_previous_strong_element(elem).inner_text
|
15
|
+
AffectedService.new(reason.strip, incident_text.strip)
|
16
|
+
end
|
17
|
+
end.compact
|
18
|
+
end
|
20
19
|
|
21
|
-
|
20
|
+
private
|
22
21
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
22
|
+
def find_previous_strong_element(elem)
|
23
|
+
return elem if (elem.respond_to?(:name) and elem.name == 'strong')
|
24
|
+
find_previous_strong_element(elem.previous)
|
25
|
+
end
|
27
26
|
|
27
|
+
end
|
28
28
|
end
|
@@ -1,55 +1,57 @@
|
|
1
|
-
require 'rubygems'
|
2
1
|
require 'hpricot'
|
3
|
-
require 'affected_services_report'
|
4
2
|
require 'csv'
|
3
|
+
require 'date'
|
5
4
|
|
6
|
-
|
5
|
+
module SoutheasternDailyPerformance
|
6
|
+
class DailyPerformanceReport
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
8
|
+
def initialize(html)
|
9
|
+
html = html.gsub(%r! !, ' ')
|
10
|
+
@doc = Hpricot(html)
|
11
|
+
end
|
11
12
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
13
|
+
def date
|
14
|
+
date = (@doc/'h1').inner_text
|
15
|
+
date.gsub!(/\?/, ' ')
|
16
|
+
if date =~ /(.+?) (\d+) (.+)/
|
17
|
+
elsif (@doc/'h2').inner_text =~ /(.+?) (\d+) (.+)/
|
18
|
+
end
|
19
|
+
day_name, day, month_name = $1, $2, $3
|
20
|
+
date = [day, month_name[0..2].downcase, '2010'].join('-')
|
21
|
+
Date.parse(date)
|
17
22
|
end
|
18
|
-
day_name, day, month_name = $1, $2, $3
|
19
|
-
date = [day, month_name[0..2].downcase, '2010'].join('-')
|
20
|
-
Date.parse(date)
|
21
|
-
end
|
22
23
|
|
23
|
-
|
24
|
-
|
25
|
-
|
24
|
+
def scheduled_services
|
25
|
+
report[/(\d+) train services were scheduled/, 1].to_i
|
26
|
+
end
|
26
27
|
|
27
|
-
|
28
|
-
|
29
|
-
|
28
|
+
def actual_services
|
29
|
+
report[/of which (\d+) ran/, 1].to_i
|
30
|
+
end
|
30
31
|
|
31
|
-
|
32
|
-
|
33
|
-
|
32
|
+
def services_within_five_minutes_of_schedule
|
33
|
+
report[/(\d+)% of services ran within 5 minutes of schedule/, 1].to_i
|
34
|
+
end
|
34
35
|
|
35
|
-
|
36
|
-
|
37
|
-
|
36
|
+
def affected_services
|
37
|
+
AffectedServicesReport.new(report_container.inner_html).affected_services
|
38
|
+
end
|
38
39
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
40
|
+
def to_csv
|
41
|
+
affected_services.collect do |service|
|
42
|
+
CSV.generate_line [date, service.reason_for_disruption, service.scheduled_start_time, service.scheduled_start_station, service.scheduled_destination_station, service.effect_on_service]
|
43
|
+
end.join("\n")
|
44
|
+
end
|
44
45
|
|
45
|
-
|
46
|
+
private
|
46
47
|
|
47
|
-
|
48
|
-
|
49
|
-
|
48
|
+
def report
|
49
|
+
report_container.inner_text
|
50
|
+
end
|
50
51
|
|
51
|
-
|
52
|
-
|
53
|
-
|
52
|
+
def report_container
|
53
|
+
(@doc/'h1').first.parent.next_sibling
|
54
|
+
end
|
54
55
|
|
56
|
+
end
|
55
57
|
end
|
metadata
CHANGED
@@ -1,7 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: southeastern-daily-performance
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 3
|
9
|
+
version: 0.0.3
|
5
10
|
platform: ruby
|
6
11
|
authors:
|
7
12
|
- Chris Roos
|
@@ -9,10 +14,21 @@ autorequire:
|
|
9
14
|
bindir: bin
|
10
15
|
cert_chain: []
|
11
16
|
|
12
|
-
date: 2010-
|
17
|
+
date: 2010-10-09 00:00:00 +01:00
|
13
18
|
default_executable:
|
14
|
-
dependencies:
|
15
|
-
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: hpricot
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
segments:
|
28
|
+
- 0
|
29
|
+
version: "0"
|
30
|
+
type: :runtime
|
31
|
+
version_requirements: *id001
|
16
32
|
description:
|
17
33
|
email: chris@seagul.co.uk
|
18
34
|
executables:
|
@@ -20,9 +36,9 @@ executables:
|
|
20
36
|
extensions: []
|
21
37
|
|
22
38
|
extra_rdoc_files:
|
23
|
-
- README
|
39
|
+
- README.md
|
24
40
|
files:
|
25
|
-
- README
|
41
|
+
- README.md
|
26
42
|
- Rakefile
|
27
43
|
- lib/sedpr/affected_service.rb
|
28
44
|
- lib/sedpr/affected_services_report.rb
|
@@ -35,25 +51,27 @@ licenses: []
|
|
35
51
|
post_install_message:
|
36
52
|
rdoc_options:
|
37
53
|
- --main
|
38
|
-
- README
|
54
|
+
- README.md
|
39
55
|
require_paths:
|
40
56
|
- lib
|
41
57
|
required_ruby_version: !ruby/object:Gem::Requirement
|
42
58
|
requirements:
|
43
59
|
- - ">="
|
44
60
|
- !ruby/object:Gem::Version
|
61
|
+
segments:
|
62
|
+
- 0
|
45
63
|
version: "0"
|
46
|
-
version:
|
47
64
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
48
65
|
requirements:
|
49
66
|
- - ">="
|
50
67
|
- !ruby/object:Gem::Version
|
68
|
+
segments:
|
69
|
+
- 0
|
51
70
|
version: "0"
|
52
|
-
version:
|
53
71
|
requirements: []
|
54
72
|
|
55
73
|
rubyforge_project: southeastern-daily-performance
|
56
|
-
rubygems_version: 1.3.
|
74
|
+
rubygems_version: 1.3.6
|
57
75
|
signing_key:
|
58
76
|
specification_version: 3
|
59
77
|
summary: Converts Southeaster Daily Performance reports from HTML to CSV
|
data/README
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
Installation
|
2
|
-
============
|
3
|
-
$ gem install southeastern-daily-performance -r http://gemcutter.org
|
4
|
-
|
5
|
-
|
6
|
-
Usage
|
7
|
-
=====
|
8
|
-
$ sedpr-to-csv <location-of-html>
|
9
|
-
|
10
|
-
|
11
|
-
Examples
|
12
|
-
========
|
13
|
-
# Explicitly download html and convert local file
|
14
|
-
$ curl "http://www.southeasternrailway.co.uk/index.php/cms/pages/view/132" > sedpr.html
|
15
|
-
$ sedpr-to-csv sedpr.html
|
16
|
-
|
17
|
-
# Implicitly download html and convert
|
18
|
-
$ sedpr-to-csv http://www.southeasternrailway.co.uk/index.php/cms/pages/view/132
|