southeastern-daily-performance 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/README.md ADDED
@@ -0,0 +1,19 @@
1
+ ## Installation
2
+
3
+ $ gem install southeastern-daily-performance -r http://gemcutter.org
4
+
5
+ ## Usage
6
+
7
+ $ sedpr-to-csv <location-of-html>
8
+
9
+ ## Examples
10
+
11
+ ### Explicitly download html and convert local file
12
+
13
+ $ curl "http://www.southeasternrailway.co.uk/index.php/cms/pages/view/132" > sedpr.html
14
+ $ sedpr-to-csv sedpr.html
15
+
16
+
17
+ ### Implicitly download html and convert
18
+
19
+ $ sedpr-to-csv http://www.southeasternrailway.co.uk/index.php/cms/pages/view/132
data/Rakefile CHANGED
@@ -1,24 +1,24 @@
1
1
  task :default => :test
2
2
 
3
- # require "rake/testtask"
4
- # Rake::TestTask.new do |t|
5
- # t.libs << "test"
6
- # t.test_files = FileList["test/**/*_test.rb"]
7
- # t.verbose = true
8
- # end
9
- #
10
- # require File.join(File.dirname(__FILE__), 'lib', 'sedpr')
11
- # task 'convert' do
12
- # if data_dir = ENV['DATA_DIR']
13
- # Dir[File.join(data_dir, '*.html')].each do |html_file|
14
- # html = File.read(html_file)
15
- # puts DailyPerformanceReport.new(html).to_csv
16
- # end
17
- # else
18
- # puts "Usage: DATA_DIR=/path/to/html-reports rake convert"
19
- # exit 1
20
- # end
21
- # end
3
+ require "rake/testtask"
4
+ Rake::TestTask.new do |t|
5
+ t.libs << "test"
6
+ t.test_files = FileList["test/**/*_test.rb"]
7
+ t.verbose = true
8
+ end
9
+
10
+ require File.join(File.dirname(__FILE__), 'lib', 'sedpr')
11
+ task 'convert' do
12
+ if data_dir = ENV['DATA_DIR']
13
+ Dir[File.join(data_dir, '*.html')].each do |html_file|
14
+ html = File.read(html_file)
15
+ puts DailyPerformanceReport.new(html).to_csv
16
+ end
17
+ else
18
+ puts "Usage: DATA_DIR=/path/to/html-reports rake convert"
19
+ exit 1
20
+ end
21
+ end
22
22
 
23
23
  require "rubygems"
24
24
  require "rake/gempackagetask"
@@ -33,24 +33,24 @@ spec = Gem::Specification.new do |s|
33
33
 
34
34
  # Change these as appropriate
35
35
  s.name = "southeastern-daily-performance"
36
- s.version = "0.0.2"
36
+ s.version = "0.0.3"
37
37
  s.summary = "Converts Southeaster Daily Performance reports from HTML to CSV"
38
38
  s.author = "Chris Roos"
39
39
  s.email = "chris@seagul.co.uk"
40
40
  s.homepage = "http://github.com/chrisroos/southeastern-daily-performance"
41
41
 
42
42
  s.has_rdoc = true
43
- s.extra_rdoc_files = %w(README)
44
- s.rdoc_options = %w(--main README)
43
+ s.extra_rdoc_files = %w(README.md)
44
+ s.rdoc_options = %w(--main README.md)
45
45
 
46
46
  # Add any extra files to include in the gem
47
- s.files = %w(README Rakefile) + Dir.glob("{bin,test,lib/**/*}")
47
+ s.files = %w(README.md Rakefile) + Dir.glob("{bin,test,lib/**/*}")
48
48
  s.executables = FileList["bin/**"].map { |f| File.basename(f) }
49
49
  s.require_paths = ["lib"]
50
50
 
51
51
  # If you want to depend on other gems, add them here, along with any
52
52
  # relevant versions
53
- # s.add_dependency("some_other_gem", "~> 0.1.0")
53
+ s.add_dependency('hpricot')
54
54
 
55
55
  # If your tests use any gems, include them here
56
56
  # s.add_development_dependency("mocha") # for example
data/bin/sedpr-to-csv CHANGED
@@ -8,5 +8,5 @@ unless html_location = ARGV[0]
8
8
  exit 1
9
9
  end
10
10
 
11
- html = open(html_location)
12
- puts DailyPerformanceReport.new(html).to_csv
11
+ io = open(html_location)
12
+ puts SoutheasternDailyPerformance::DailyPerformanceReport.new(io.read).to_csv
data/lib/sedpr.rb CHANGED
@@ -1,3 +1,3 @@
1
- $: << File.join(File.dirname(__FILE__), 'sedpr')
2
-
3
- require 'daily_performance_report'
1
+ require File.expand_path '../sedpr/affected_service', __FILE__
2
+ require File.expand_path '../sedpr/affected_services_report', __FILE__
3
+ require File.expand_path '../sedpr/daily_performance_report', __FILE__
@@ -1,41 +1,43 @@
1
- class AffectedService
1
+ module SoutheasternDailyPerformance
2
+ class AffectedService
2
3
 
3
- attr_reader :reason_for_disruption
4
- attr_reader :scheduled_start_time, :scheduled_start_station, :scheduled_destination_station
5
- attr_reader :effect_on_service
4
+ attr_reader :reason_for_disruption
5
+ attr_reader :scheduled_start_time, :scheduled_start_station, :scheduled_destination_station
6
+ attr_reader :effect_on_service
6
7
 
7
- def initialize(reason_for_disruption, incident_text)
8
- @reason_for_disruption = reason_for_disruption
9
- if incident_text =~ /(\d\d:\d\d) (.*?) ?(?:-|to|\?) (.*)/
10
- @scheduled_start_time, @scheduled_start_station = $1, $2
11
- destination_and_effect_on_service = $3
12
- @scheduled_start_station.gsub!(/[^a-zA-Z ]/, '')
13
- reasons = [
14
- 'cancelled', 'started', 'delayed by', 'did not call', 'terminated at', 'diverted'
15
- ]
16
- matches = reasons.collect do |reason|
17
- destination_and_effect_on_service =~ /#{reason}/i
18
- end
19
- if matches.compact.min && reason = reasons[matches.index(matches.compact.min)]
20
- destination_and_effect_on_service =~ /(.*) (#{reason}.*)/i
21
- else
22
- unless destination_and_effect_on_service.split(' ').length == 1
23
- warn "Warning. Unknown, or missing, affect on service: '#{incident_text}'"
8
+ def initialize(reason_for_disruption, incident_text)
9
+ @reason_for_disruption = reason_for_disruption
10
+ if incident_text =~ /(\d\d:\d\d) (.*?) ?(?:-|to|\?) (.*)/
11
+ @scheduled_start_time, @scheduled_start_station = $1, $2
12
+ destination_and_effect_on_service = $3
13
+ @scheduled_start_station.gsub!(/[^a-zA-Z ]/, '')
14
+ reasons = [
15
+ 'cancelled', 'started', 'delayed by', 'did not call', 'terminated at', 'diverted'
16
+ ]
17
+ matches = reasons.collect do |reason|
18
+ destination_and_effect_on_service =~ /#{reason}/i
24
19
  end
25
- destination_and_effect_on_service =~ /(.*)/
20
+ if matches.compact.min && reason = reasons[matches.index(matches.compact.min)]
21
+ destination_and_effect_on_service =~ /(.*) (#{reason}.*)/i
22
+ else
23
+ unless destination_and_effect_on_service.split(' ').length == 1
24
+ warn "Warning. Unknown, or missing, affect on service: '#{incident_text}'"
25
+ end
26
+ destination_and_effect_on_service =~ /(.*)/
27
+ end
28
+ @scheduled_destination_station, @effect_on_service = $1, ($2||'')
29
+ else
30
+ warn "Warning. Cannot parse service details: '#{incident_text}'"
26
31
  end
27
- @scheduled_destination_station, @effect_on_service = $1, ($2||'')
28
- else
29
- warn "Warning. Cannot parse service details: '#{incident_text}'"
30
32
  end
31
- end
32
33
 
33
- def ==(incident)
34
- self.reason_for_disruption == incident.reason_for_disruption and
35
- self.scheduled_start_time == incident.scheduled_start_time and
36
- self.scheduled_start_station == incident.scheduled_start_station and
37
- self.scheduled_destination_station == incident.scheduled_destination_station and
38
- self.effect_on_service == incident.effect_on_service
39
- end
34
+ def ==(incident)
35
+ self.reason_for_disruption == incident.reason_for_disruption and
36
+ self.scheduled_start_time == incident.scheduled_start_time and
37
+ self.scheduled_start_station == incident.scheduled_start_station and
38
+ self.scheduled_destination_station == incident.scheduled_destination_station and
39
+ self.effect_on_service == incident.effect_on_service
40
+ end
40
41
 
42
+ end
41
43
  end
@@ -1,28 +1,28 @@
1
- require 'rubygems'
2
1
  require 'hpricot'
3
- require 'affected_service'
4
2
 
5
- class AffectedServicesReport
3
+ module SoutheasternDailyPerformance
4
+ class AffectedServicesReport
6
5
 
7
- def initialize(html)
8
- html.gsub!(/&nbsp;/, ' ')
9
- @doc = Hpricot(html)
10
- end
6
+ def initialize(html)
7
+ html.gsub!(/&nbsp;/, ' ')
8
+ @doc = Hpricot(html)
9
+ end
11
10
 
12
- def affected_services
13
- @doc.search('*').collect do |elem|
14
- if elem.text? and incident_text = elem.inner_text[/\d\d:\d\d.*/]
15
- reason = find_previous_strong_element(elem).inner_text
16
- AffectedService.new(reason.strip, incident_text.strip)
17
- end
18
- end.compact
19
- end
11
+ def affected_services
12
+ @doc.search('*').collect do |elem|
13
+ if elem.text? and incident_text = elem.inner_text[/\d\d:\d\d.*/]
14
+ reason = find_previous_strong_element(elem).inner_text
15
+ AffectedService.new(reason.strip, incident_text.strip)
16
+ end
17
+ end.compact
18
+ end
20
19
 
21
- private
20
+ private
22
21
 
23
- def find_previous_strong_element(elem)
24
- return elem if (elem.respond_to?(:name) and elem.name == 'strong')
25
- find_previous_strong_element(elem.previous)
26
- end
22
+ def find_previous_strong_element(elem)
23
+ return elem if (elem.respond_to?(:name) and elem.name == 'strong')
24
+ find_previous_strong_element(elem.previous)
25
+ end
27
26
 
27
+ end
28
28
  end
@@ -1,55 +1,57 @@
1
- require 'rubygems'
2
1
  require 'hpricot'
3
- require 'affected_services_report'
4
2
  require 'csv'
3
+ require 'date'
5
4
 
6
- class DailyPerformanceReport
5
+ module SoutheasternDailyPerformance
6
+ class DailyPerformanceReport
7
7
 
8
- def initialize(html)
9
- @doc = Hpricot(html)
10
- end
8
+ def initialize(html)
9
+ html = html.gsub(%r!&nbsp;!, ' ')
10
+ @doc = Hpricot(html)
11
+ end
11
12
 
12
- def date
13
- date = (@doc/'h1').inner_text
14
- date.gsub!(/\?/, ' ')
15
- if date =~ /(.+?) (\d+) (.+)/
16
- elsif (@doc/'h2').inner_text =~ /(.+?) (\d+) (.+)/
13
+ def date
14
+ date = (@doc/'h1').inner_text
15
+ date.gsub!(/\?/, ' ')
16
+ if date =~ /(.+?) (\d+) (.+)/
17
+ elsif (@doc/'h2').inner_text =~ /(.+?) (\d+) (.+)/
18
+ end
19
+ day_name, day, month_name = $1, $2, $3
20
+ date = [day, month_name[0..2].downcase, '2010'].join('-')
21
+ Date.parse(date)
17
22
  end
18
- day_name, day, month_name = $1, $2, $3
19
- date = [day, month_name[0..2].downcase, '2010'].join('-')
20
- Date.parse(date)
21
- end
22
23
 
23
- def scheduled_services
24
- report[/(\d+) train services were scheduled/, 1].to_i
25
- end
24
+ def scheduled_services
25
+ report[/(\d+) train services were scheduled/, 1].to_i
26
+ end
26
27
 
27
- def actual_services
28
- report[/of which (\d+) ran/, 1].to_i
29
- end
28
+ def actual_services
29
+ report[/of which (\d+) ran/, 1].to_i
30
+ end
30
31
 
31
- def services_within_five_minutes_of_schedule
32
- report[/(\d+)% of services ran within 5 minutes of schedule/, 1].to_i
33
- end
32
+ def services_within_five_minutes_of_schedule
33
+ report[/(\d+)% of services ran within 5 minutes of schedule/, 1].to_i
34
+ end
34
35
 
35
- def affected_services
36
- AffectedServicesReport.new(report_container.inner_html).affected_services
37
- end
36
+ def affected_services
37
+ AffectedServicesReport.new(report_container.inner_html).affected_services
38
+ end
38
39
 
39
- def to_csv
40
- affected_services.collect do |service|
41
- CSV.generate_line [date, service.reason_for_disruption, service.scheduled_start_time, service.scheduled_start_station, service.scheduled_destination_station, service.effect_on_service]
42
- end.join("\n")
43
- end
40
+ def to_csv
41
+ affected_services.collect do |service|
42
+ CSV.generate_line [date, service.reason_for_disruption, service.scheduled_start_time, service.scheduled_start_station, service.scheduled_destination_station, service.effect_on_service]
43
+ end.join("\n")
44
+ end
44
45
 
45
- private
46
+ private
46
47
 
47
- def report
48
- report_container.inner_text
49
- end
48
+ def report
49
+ report_container.inner_text
50
+ end
50
51
 
51
- def report_container
52
- (@doc/'h1').first.parent.next_sibling
53
- end
52
+ def report_container
53
+ (@doc/'h1').first.parent.next_sibling
54
+ end
54
55
 
56
+ end
55
57
  end
metadata CHANGED
@@ -1,7 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: southeastern-daily-performance
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 3
9
+ version: 0.0.3
5
10
  platform: ruby
6
11
  authors:
7
12
  - Chris Roos
@@ -9,10 +14,21 @@ autorequire:
9
14
  bindir: bin
10
15
  cert_chain: []
11
16
 
12
- date: 2010-02-14 00:00:00 +00:00
17
+ date: 2010-10-09 00:00:00 +01:00
13
18
  default_executable:
14
- dependencies: []
15
-
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: hpricot
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 0
29
+ version: "0"
30
+ type: :runtime
31
+ version_requirements: *id001
16
32
  description:
17
33
  email: chris@seagul.co.uk
18
34
  executables:
@@ -20,9 +36,9 @@ executables:
20
36
  extensions: []
21
37
 
22
38
  extra_rdoc_files:
23
- - README
39
+ - README.md
24
40
  files:
25
- - README
41
+ - README.md
26
42
  - Rakefile
27
43
  - lib/sedpr/affected_service.rb
28
44
  - lib/sedpr/affected_services_report.rb
@@ -35,25 +51,27 @@ licenses: []
35
51
  post_install_message:
36
52
  rdoc_options:
37
53
  - --main
38
- - README
54
+ - README.md
39
55
  require_paths:
40
56
  - lib
41
57
  required_ruby_version: !ruby/object:Gem::Requirement
42
58
  requirements:
43
59
  - - ">="
44
60
  - !ruby/object:Gem::Version
61
+ segments:
62
+ - 0
45
63
  version: "0"
46
- version:
47
64
  required_rubygems_version: !ruby/object:Gem::Requirement
48
65
  requirements:
49
66
  - - ">="
50
67
  - !ruby/object:Gem::Version
68
+ segments:
69
+ - 0
51
70
  version: "0"
52
- version:
53
71
  requirements: []
54
72
 
55
73
  rubyforge_project: southeastern-daily-performance
56
- rubygems_version: 1.3.5
74
+ rubygems_version: 1.3.6
57
75
  signing_key:
58
76
  specification_version: 3
59
77
  summary: Converts Southeaster Daily Performance reports from HTML to CSV
data/README DELETED
@@ -1,18 +0,0 @@
1
- Installation
2
- ============
3
- $ gem install southeastern-daily-performance -r http://gemcutter.org
4
-
5
-
6
- Usage
7
- =====
8
- $ sedpr-to-csv <location-of-html>
9
-
10
-
11
- Examples
12
- ========
13
- # Explicitly download html and convert local file
14
- $ curl "http://www.southeasternrailway.co.uk/index.php/cms/pages/view/132" > sedpr.html
15
- $ sedpr-to-csv sedpr.html
16
-
17
- # Implicitly download html and convert
18
- $ sedpr-to-csv http://www.southeasternrailway.co.uk/index.php/cms/pages/view/132