southeastern-daily-performance 0.0.3 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +27 -2
- data/Rakefile +3 -59
- data/bin/convert-all-html-data +30 -0
- data/bin/sedpr-to-csv +9 -3
- data/lib/southeastern_daily_performance.rb +3 -0
- data/lib/{sedpr → southeastern_daily_performance}/affected_service.rb +2 -1
- data/lib/{sedpr → southeastern_daily_performance}/affected_services_report.rb +0 -0
- data/lib/{sedpr → southeastern_daily_performance}/daily_performance_report.rb +15 -7
- metadata +32 -8
- data/lib/sedpr.rb +0 -3
data/README.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
## Intro
|
2
|
+
|
3
|
+
Southeastern publish [Daily Performance reports](http://www.southeasternrailway.co.uk/your-journey/daily-performance/). They're not very usable as they are. This project currently contains the code I've used to parse that data and the parsed data itself.
|
4
|
+
|
5
|
+
I've also popped the data into [this Google Fusion table](http://www.google.com/fusiontables/DataSource?dsrcid=359310) and added a bit of a [query tool](http://chrisroos.github.com/southeastern-daily-performance/) and [dashboard](http://chrisroos.github.com/southeastern-daily-performance/dashboard.html) (of sorts).
|
6
|
+
|
1
7
|
## Installation
|
2
8
|
|
3
9
|
$ gem install southeastern-daily-performance -r http://gemcutter.org
|
@@ -13,7 +19,26 @@
|
|
13
19
|
$ curl "http://www.southeasternrailway.co.uk/index.php/cms/pages/view/132" > sedpr.html
|
14
20
|
$ sedpr-to-csv sedpr.html
|
15
21
|
|
16
|
-
|
17
22
|
### Implicitly download html and convert
|
18
23
|
|
19
|
-
$ sedpr-to-csv http://www.southeasternrailway.co.uk/index.php/cms/pages/view/132
|
24
|
+
$ sedpr-to-csv http://www.southeasternrailway.co.uk/index.php/cms/pages/view/132
|
25
|
+
|
26
|
+
## Notes
|
27
|
+
|
28
|
+
### Combining all csv files into one big file
|
29
|
+
|
30
|
+
$ echo "Date,Problem,Scheduled departure time,Scheduled departure station,Scheduled arrival station,Affect on service" > combined.csv
|
31
|
+
$ cat /path/to/csv/files/*.csv >> combined.csv
|
32
|
+
|
33
|
+
### Combining all csv overview files into one file
|
34
|
+
|
35
|
+
$ echo "Date,Services scheduled,Services run,Services within 5 minutes of schedule" > combined.overview.csv
|
36
|
+
$ cat /path/to/csv/files/*.overview.csv >> combined.overview.csv
|
37
|
+
|
38
|
+
## TODO
|
39
|
+
|
40
|
+
* 2010-04-21 breaks the parser...
|
41
|
+
* Ensure that I'm generating valid CSV (specifically that I'm quoting columns that contain commas)
|
42
|
+
* I currently have an empty file for 2010-11-30.csv. Investigate the cause.
|
43
|
+
* Republish the gem
|
44
|
+
* I'm getting 0s for the parsed overview from 2010-11-30. Investigate the cause.
|
data/Rakefile
CHANGED
@@ -7,19 +7,6 @@ Rake::TestTask.new do |t|
|
|
7
7
|
t.verbose = true
|
8
8
|
end
|
9
9
|
|
10
|
-
require File.join(File.dirname(__FILE__), 'lib', 'sedpr')
|
11
|
-
task 'convert' do
|
12
|
-
if data_dir = ENV['DATA_DIR']
|
13
|
-
Dir[File.join(data_dir, '*.html')].each do |html_file|
|
14
|
-
html = File.read(html_file)
|
15
|
-
puts DailyPerformanceReport.new(html).to_csv
|
16
|
-
end
|
17
|
-
else
|
18
|
-
puts "Usage: DATA_DIR=/path/to/html-reports rake convert"
|
19
|
-
exit 1
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
10
|
require "rubygems"
|
24
11
|
require "rake/gempackagetask"
|
25
12
|
require "rake/rdoctask"
|
@@ -33,7 +20,7 @@ spec = Gem::Specification.new do |s|
|
|
33
20
|
|
34
21
|
# Change these as appropriate
|
35
22
|
s.name = "southeastern-daily-performance"
|
36
|
-
s.version = "0.0.
|
23
|
+
s.version = "0.0.5"
|
37
24
|
s.summary = "Converts Southeaster Daily Performance reports from HTML to CSV"
|
38
25
|
s.author = "Chris Roos"
|
39
26
|
s.email = "chris@seagul.co.uk"
|
@@ -53,7 +40,7 @@ spec = Gem::Specification.new do |s|
|
|
53
40
|
s.add_dependency('hpricot')
|
54
41
|
|
55
42
|
# If your tests use any gems, include them here
|
56
|
-
|
43
|
+
s.add_development_dependency("mocha") # for example
|
57
44
|
|
58
45
|
# If you want to publish automatically to rubyforge, you'll may need
|
59
46
|
# to tweak this, and the publishing task below too.
|
@@ -82,47 +69,4 @@ end
|
|
82
69
|
desc 'Clear out RDoc and generated packages'
|
83
70
|
task :clean => [:clobber_rdoc, :clobber_package] do
|
84
71
|
rm "#{spec.name}.gemspec"
|
85
|
-
end
|
86
|
-
|
87
|
-
# If you want to publish to RubyForge automatically, here's a simple
|
88
|
-
# task to help do that. If you don't, just get rid of this.
|
89
|
-
# Be sure to set up your Rubyforge account details with the Rubyforge
|
90
|
-
# gem; you'll need to run `rubyforge setup` and `rubyforge config` at
|
91
|
-
# the very least.
|
92
|
-
begin
|
93
|
-
require "rake/contrib/sshpublisher"
|
94
|
-
namespace :rubyforge do
|
95
|
-
|
96
|
-
desc "Release gem and RDoc documentation to RubyForge"
|
97
|
-
task :release => ["rubyforge:release:gem", "rubyforge:release:docs"]
|
98
|
-
|
99
|
-
namespace :release do
|
100
|
-
desc "Release a new version of this gem"
|
101
|
-
task :gem => [:package] do
|
102
|
-
require 'rubyforge'
|
103
|
-
rubyforge = RubyForge.new
|
104
|
-
rubyforge.configure
|
105
|
-
rubyforge.login
|
106
|
-
rubyforge.userconfig['release_notes'] = spec.summary
|
107
|
-
path_to_gem = File.join(File.dirname(__FILE__), "pkg", "#{spec.name}-#{spec.version}.gem")
|
108
|
-
puts "Publishing #{spec.name}-#{spec.version.to_s} to Rubyforge..."
|
109
|
-
rubyforge.add_release(spec.rubyforge_project, spec.name, spec.version.to_s, path_to_gem)
|
110
|
-
end
|
111
|
-
|
112
|
-
desc "Publish RDoc to RubyForge."
|
113
|
-
task :docs => [:rdoc] do
|
114
|
-
config = YAML.load(
|
115
|
-
File.read(File.expand_path('~/.rubyforge/user-config.yml'))
|
116
|
-
)
|
117
|
-
|
118
|
-
host = "#{config['username']}@rubyforge.org"
|
119
|
-
remote_dir = "/var/www/gforge-projects/southeastern-daily-performance/" # Should be the same as the rubyforge project name
|
120
|
-
local_dir = 'rdoc'
|
121
|
-
|
122
|
-
Rake::SshDirPublisher.new(host, remote_dir, local_dir).upload
|
123
|
-
end
|
124
|
-
end
|
125
|
-
end
|
126
|
-
rescue LoadError
|
127
|
-
puts "Rake SshDirPublisher is unavailable or your rubyforge environment is not configured."
|
128
|
-
end
|
72
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
FORMATS = ['full', 'overview']
|
4
|
+
|
5
|
+
def print_usage_instruction_and_exit
|
6
|
+
puts "Usage: #{File.basename(__FILE__)} [#{FORMATS.join('|')}] html-input-directory csv-output-directory"
|
7
|
+
exit 1
|
8
|
+
end
|
9
|
+
|
10
|
+
format, html_input_dir, csv_output_dir = ARGV
|
11
|
+
print_usage_instruction_and_exit unless format && FORMATS.include?(format)
|
12
|
+
print_usage_instruction_and_exit unless html_input_dir && File.directory?(html_input_dir)
|
13
|
+
print_usage_instruction_and_exit unless csv_output_dir && File.directory?(csv_output_dir)
|
14
|
+
|
15
|
+
# ROOT = File.expand_path('../../', __FILE__)
|
16
|
+
# LIB_DIR = File.join(ROOT, 'lib')
|
17
|
+
# CONVERTOR = File.join(ROOT, 'bin', 'sedpr-to-csv')
|
18
|
+
OPTIONAL_ARGS = (format == 'overview') ? 'overview' : ''
|
19
|
+
|
20
|
+
Dir[File.join(html_input_dir, '*.html')].each do |file|
|
21
|
+
p file
|
22
|
+
|
23
|
+
html_filename = File.basename(file)
|
24
|
+
csv_filename = html_filename.sub(/\.html/, '.csv')
|
25
|
+
csv_filename = csv_filename.sub(/\.csv/, '.overview.csv') if format == 'overview'
|
26
|
+
|
27
|
+
# cmd = "ruby -I#{LIB_DIR} #{CONVERTOR} #{file} #{OPTIONAL_ARGS} > #{File.join(csv_output_dir, csv_filename)}"
|
28
|
+
cmd = "sedpr-to-csv #{file} #{OPTIONAL_ARGS} > #{File.join(csv_output_dir, csv_filename)}"
|
29
|
+
`#{cmd}`
|
30
|
+
end
|
data/bin/sedpr-to-csv
CHANGED
@@ -1,12 +1,18 @@
|
|
1
1
|
#! /usr/bin/env ruby
|
2
2
|
|
3
3
|
require 'open-uri'
|
4
|
-
require
|
4
|
+
require 'southeastern_daily_performance'
|
5
5
|
|
6
6
|
unless html_location = ARGV[0]
|
7
|
-
|
7
|
+
msg = []
|
8
|
+
msg << "Usage: sedpr-to-csv location-of-html [overview]"
|
9
|
+
msg << " * location-of-html - Can be a file or url."
|
10
|
+
msg << " * overview - Optional. Generate the overview data."
|
11
|
+
puts msg.join("\n")
|
8
12
|
exit 1
|
9
13
|
end
|
10
14
|
|
15
|
+
format = ARGV[1]
|
16
|
+
|
11
17
|
io = open(html_location)
|
12
|
-
puts SoutheasternDailyPerformance::DailyPerformanceReport.new(io.read).to_csv
|
18
|
+
puts SoutheasternDailyPerformance::DailyPerformanceReport.new(io.read).to_csv(format)
|
@@ -7,12 +7,13 @@ module SoutheasternDailyPerformance
|
|
7
7
|
|
8
8
|
def initialize(reason_for_disruption, incident_text)
|
9
9
|
@reason_for_disruption = reason_for_disruption
|
10
|
+
incident_text.gsub!(/–/, '-') # Replace ndash with normal dash
|
10
11
|
if incident_text =~ /(\d\d:\d\d) (.*?) ?(?:-|to|\?) (.*)/
|
11
12
|
@scheduled_start_time, @scheduled_start_station = $1, $2
|
12
13
|
destination_and_effect_on_service = $3
|
13
14
|
@scheduled_start_station.gsub!(/[^a-zA-Z ]/, '')
|
14
15
|
reasons = [
|
15
|
-
'cancelled', 'started', 'delayed by', 'did not call', 'terminated at', 'diverted'
|
16
|
+
'cancelled', 'started', 'delayed by', 'did not call', 'terminated( at)?', 'diverted', 'ran fast', 'called'
|
16
17
|
]
|
17
18
|
matches = reasons.collect do |reason|
|
18
19
|
destination_and_effect_on_service =~ /#{reason}/i
|
File without changes
|
@@ -22,7 +22,7 @@ module SoutheasternDailyPerformance
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def scheduled_services
|
25
|
-
report[/(\d+) train services were scheduled/, 1].to_i
|
25
|
+
report[/(\d+) (train )?services were scheduled/, 1].to_i
|
26
26
|
end
|
27
27
|
|
28
28
|
def actual_services
|
@@ -30,17 +30,21 @@ module SoutheasternDailyPerformance
|
|
30
30
|
end
|
31
31
|
|
32
32
|
def services_within_five_minutes_of_schedule
|
33
|
-
report[/(\d
|
33
|
+
report[/(\d+\.?\d+?)% of services ran within 5 minutes of schedule/, 1].to_f
|
34
34
|
end
|
35
35
|
|
36
36
|
def affected_services
|
37
37
|
AffectedServicesReport.new(report_container.inner_html).affected_services
|
38
38
|
end
|
39
39
|
|
40
|
-
def to_csv
|
41
|
-
|
42
|
-
CSV.generate_line [date,
|
43
|
-
|
40
|
+
def to_csv(type=nil)
|
41
|
+
if "#{type}" == 'overview'
|
42
|
+
CSV.generate_line [date, scheduled_services, actual_services, services_within_five_minutes_of_schedule]
|
43
|
+
else
|
44
|
+
affected_services.collect do |service|
|
45
|
+
CSV.generate_line [date, service.reason_for_disruption, service.scheduled_start_time, service.scheduled_start_station, service.scheduled_destination_station, service.effect_on_service]
|
46
|
+
end.join("\n")
|
47
|
+
end
|
44
48
|
end
|
45
49
|
|
46
50
|
private
|
@@ -50,7 +54,11 @@ module SoutheasternDailyPerformance
|
|
50
54
|
end
|
51
55
|
|
52
56
|
def report_container
|
53
|
-
(@doc/'
|
57
|
+
if (@doc/'#mainblock').any?
|
58
|
+
(@doc/'h1').first.next_sibling
|
59
|
+
else
|
60
|
+
(@doc/'h1').first.parent.next_sibling
|
61
|
+
end
|
54
62
|
end
|
55
63
|
|
56
64
|
end
|
metadata
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: southeastern-daily-performance
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
+
hash: 21
|
4
5
|
prerelease: false
|
5
6
|
segments:
|
6
7
|
- 0
|
7
8
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
9
|
+
- 5
|
10
|
+
version: 0.0.5
|
10
11
|
platform: ruby
|
11
12
|
authors:
|
12
13
|
- Chris Roos
|
@@ -14,24 +15,41 @@ autorequire:
|
|
14
15
|
bindir: bin
|
15
16
|
cert_chain: []
|
16
17
|
|
17
|
-
date: 2010-
|
18
|
+
date: 2010-12-22 00:00:00 +00:00
|
18
19
|
default_executable:
|
19
20
|
dependencies:
|
20
21
|
- !ruby/object:Gem::Dependency
|
21
22
|
name: hpricot
|
22
23
|
prerelease: false
|
23
24
|
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
24
26
|
requirements:
|
25
27
|
- - ">="
|
26
28
|
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
27
30
|
segments:
|
28
31
|
- 0
|
29
32
|
version: "0"
|
30
33
|
type: :runtime
|
31
34
|
version_requirements: *id001
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: mocha
|
37
|
+
prerelease: false
|
38
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
hash: 3
|
44
|
+
segments:
|
45
|
+
- 0
|
46
|
+
version: "0"
|
47
|
+
type: :development
|
48
|
+
version_requirements: *id002
|
32
49
|
description:
|
33
50
|
email: chris@seagul.co.uk
|
34
51
|
executables:
|
52
|
+
- convert-all-html-data
|
35
53
|
- sedpr-to-csv
|
36
54
|
extensions: []
|
37
55
|
|
@@ -40,10 +58,12 @@ extra_rdoc_files:
|
|
40
58
|
files:
|
41
59
|
- README.md
|
42
60
|
- Rakefile
|
43
|
-
- lib/
|
44
|
-
- lib/
|
45
|
-
- lib/
|
46
|
-
- lib/
|
61
|
+
- lib/southeastern_daily_performance/affected_service.rb
|
62
|
+
- lib/southeastern_daily_performance/affected_services_report.rb
|
63
|
+
- lib/southeastern_daily_performance/daily_performance_report.rb
|
64
|
+
- lib/southeastern_daily_performance.rb
|
65
|
+
- bin/convert-all-html-data
|
66
|
+
- bin/sedpr-to-csv
|
47
67
|
has_rdoc: true
|
48
68
|
homepage: http://github.com/chrisroos/southeastern-daily-performance
|
49
69
|
licenses: []
|
@@ -55,23 +75,27 @@ rdoc_options:
|
|
55
75
|
require_paths:
|
56
76
|
- lib
|
57
77
|
required_ruby_version: !ruby/object:Gem::Requirement
|
78
|
+
none: false
|
58
79
|
requirements:
|
59
80
|
- - ">="
|
60
81
|
- !ruby/object:Gem::Version
|
82
|
+
hash: 3
|
61
83
|
segments:
|
62
84
|
- 0
|
63
85
|
version: "0"
|
64
86
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
87
|
+
none: false
|
65
88
|
requirements:
|
66
89
|
- - ">="
|
67
90
|
- !ruby/object:Gem::Version
|
91
|
+
hash: 3
|
68
92
|
segments:
|
69
93
|
- 0
|
70
94
|
version: "0"
|
71
95
|
requirements: []
|
72
96
|
|
73
97
|
rubyforge_project: southeastern-daily-performance
|
74
|
-
rubygems_version: 1.3.
|
98
|
+
rubygems_version: 1.3.7
|
75
99
|
signing_key:
|
76
100
|
specification_version: 3
|
77
101
|
summary: Converts Southeaster Daily Performance reports from HTML to CSV
|
data/lib/sedpr.rb
DELETED