southeastern-daily-performance 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md ADDED
@@ -0,0 +1,19 @@
1
+ ## Installation
2
+
3
+ $ gem install southeastern-daily-performance -r http://gemcutter.org
4
+
5
+ ## Usage
6
+
7
+ $ sedpr-to-csv <location-of-html>
8
+
9
+ ## Examples
10
+
11
+ ### Explicitly download html and convert local file
12
+
13
+ $ curl "http://www.southeasternrailway.co.uk/index.php/cms/pages/view/132" > sedpr.html
14
+ $ sedpr-to-csv sedpr.html
15
+
16
+
17
+ ### Implicitly download html and convert
18
+
19
+ $ sedpr-to-csv http://www.southeasternrailway.co.uk/index.php/cms/pages/view/132
data/Rakefile CHANGED
@@ -1,24 +1,24 @@
1
1
  task :default => :test
2
2
 
3
- # require "rake/testtask"
4
- # Rake::TestTask.new do |t|
5
- # t.libs << "test"
6
- # t.test_files = FileList["test/**/*_test.rb"]
7
- # t.verbose = true
8
- # end
9
- #
10
- # require File.join(File.dirname(__FILE__), 'lib', 'sedpr')
11
- # task 'convert' do
12
- # if data_dir = ENV['DATA_DIR']
13
- # Dir[File.join(data_dir, '*.html')].each do |html_file|
14
- # html = File.read(html_file)
15
- # puts DailyPerformanceReport.new(html).to_csv
16
- # end
17
- # else
18
- # puts "Usage: DATA_DIR=/path/to/html-reports rake convert"
19
- # exit 1
20
- # end
21
- # end
3
+ require "rake/testtask"
4
+ Rake::TestTask.new do |t|
5
+ t.libs << "test"
6
+ t.test_files = FileList["test/**/*_test.rb"]
7
+ t.verbose = true
8
+ end
9
+
10
+ require File.join(File.dirname(__FILE__), 'lib', 'sedpr')
11
+ task 'convert' do
12
+ if data_dir = ENV['DATA_DIR']
13
+ Dir[File.join(data_dir, '*.html')].each do |html_file|
14
+ html = File.read(html_file)
15
+ puts DailyPerformanceReport.new(html).to_csv
16
+ end
17
+ else
18
+ puts "Usage: DATA_DIR=/path/to/html-reports rake convert"
19
+ exit 1
20
+ end
21
+ end
22
22
 
23
23
  require "rubygems"
24
24
  require "rake/gempackagetask"
@@ -33,24 +33,24 @@ spec = Gem::Specification.new do |s|
33
33
 
34
34
  # Change these as appropriate
35
35
  s.name = "southeastern-daily-performance"
36
- s.version = "0.0.2"
36
+ s.version = "0.0.3"
37
37
  s.summary = "Converts Southeaster Daily Performance reports from HTML to CSV"
38
38
  s.author = "Chris Roos"
39
39
  s.email = "chris@seagul.co.uk"
40
40
  s.homepage = "http://github.com/chrisroos/southeastern-daily-performance"
41
41
 
42
42
  s.has_rdoc = true
43
- s.extra_rdoc_files = %w(README)
44
- s.rdoc_options = %w(--main README)
43
+ s.extra_rdoc_files = %w(README.md)
44
+ s.rdoc_options = %w(--main README.md)
45
45
 
46
46
  # Add any extra files to include in the gem
47
- s.files = %w(README Rakefile) + Dir.glob("{bin,test,lib/**/*}")
47
+ s.files = %w(README.md Rakefile) + Dir.glob("{bin,test,lib/**/*}")
48
48
  s.executables = FileList["bin/**"].map { |f| File.basename(f) }
49
49
  s.require_paths = ["lib"]
50
50
 
51
51
  # If you want to depend on other gems, add them here, along with any
52
52
  # relevant versions
53
- # s.add_dependency("some_other_gem", "~> 0.1.0")
53
+ s.add_dependency('hpricot')
54
54
 
55
55
  # If your tests use any gems, include them here
56
56
  # s.add_development_dependency("mocha") # for example
data/bin/sedpr-to-csv CHANGED
@@ -8,5 +8,5 @@ unless html_location = ARGV[0]
8
8
  exit 1
9
9
  end
10
10
 
11
- html = open(html_location)
12
- puts DailyPerformanceReport.new(html).to_csv
11
+ io = open(html_location)
12
+ puts SoutheasternDailyPerformance::DailyPerformanceReport.new(io.read).to_csv
data/lib/sedpr.rb CHANGED
@@ -1,3 +1,3 @@
1
- $: << File.join(File.dirname(__FILE__), 'sedpr')
2
-
3
- require 'daily_performance_report'
1
+ require File.expand_path '../sedpr/affected_service', __FILE__
2
+ require File.expand_path '../sedpr/affected_services_report', __FILE__
3
+ require File.expand_path '../sedpr/daily_performance_report', __FILE__
@@ -1,41 +1,43 @@
1
- class AffectedService
1
+ module SoutheasternDailyPerformance
2
+ class AffectedService
2
3
 
3
- attr_reader :reason_for_disruption
4
- attr_reader :scheduled_start_time, :scheduled_start_station, :scheduled_destination_station
5
- attr_reader :effect_on_service
4
+ attr_reader :reason_for_disruption
5
+ attr_reader :scheduled_start_time, :scheduled_start_station, :scheduled_destination_station
6
+ attr_reader :effect_on_service
6
7
 
7
- def initialize(reason_for_disruption, incident_text)
8
- @reason_for_disruption = reason_for_disruption
9
- if incident_text =~ /(\d\d:\d\d) (.*?) ?(?:-|to|\?) (.*)/
10
- @scheduled_start_time, @scheduled_start_station = $1, $2
11
- destination_and_effect_on_service = $3
12
- @scheduled_start_station.gsub!(/[^a-zA-Z ]/, '')
13
- reasons = [
14
- 'cancelled', 'started', 'delayed by', 'did not call', 'terminated at', 'diverted'
15
- ]
16
- matches = reasons.collect do |reason|
17
- destination_and_effect_on_service =~ /#{reason}/i
18
- end
19
- if matches.compact.min && reason = reasons[matches.index(matches.compact.min)]
20
- destination_and_effect_on_service =~ /(.*) (#{reason}.*)/i
21
- else
22
- unless destination_and_effect_on_service.split(' ').length == 1
23
- warn "Warning. Unknown, or missing, affect on service: '#{incident_text}'"
8
+ def initialize(reason_for_disruption, incident_text)
9
+ @reason_for_disruption = reason_for_disruption
10
+ if incident_text =~ /(\d\d:\d\d) (.*?) ?(?:-|to|\?) (.*)/
11
+ @scheduled_start_time, @scheduled_start_station = $1, $2
12
+ destination_and_effect_on_service = $3
13
+ @scheduled_start_station.gsub!(/[^a-zA-Z ]/, '')
14
+ reasons = [
15
+ 'cancelled', 'started', 'delayed by', 'did not call', 'terminated at', 'diverted'
16
+ ]
17
+ matches = reasons.collect do |reason|
18
+ destination_and_effect_on_service =~ /#{reason}/i
24
19
  end
25
- destination_and_effect_on_service =~ /(.*)/
20
+ if matches.compact.min && reason = reasons[matches.index(matches.compact.min)]
21
+ destination_and_effect_on_service =~ /(.*) (#{reason}.*)/i
22
+ else
23
+ unless destination_and_effect_on_service.split(' ').length == 1
24
+ warn "Warning. Unknown, or missing, affect on service: '#{incident_text}'"
25
+ end
26
+ destination_and_effect_on_service =~ /(.*)/
27
+ end
28
+ @scheduled_destination_station, @effect_on_service = $1, ($2||'')
29
+ else
30
+ warn "Warning. Cannot parse service details: '#{incident_text}'"
26
31
  end
27
- @scheduled_destination_station, @effect_on_service = $1, ($2||'')
28
- else
29
- warn "Warning. Cannot parse service details: '#{incident_text}'"
30
32
  end
31
- end
32
33
 
33
- def ==(incident)
34
- self.reason_for_disruption == incident.reason_for_disruption and
35
- self.scheduled_start_time == incident.scheduled_start_time and
36
- self.scheduled_start_station == incident.scheduled_start_station and
37
- self.scheduled_destination_station == incident.scheduled_destination_station and
38
- self.effect_on_service == incident.effect_on_service
39
- end
34
+ def ==(incident)
35
+ self.reason_for_disruption == incident.reason_for_disruption and
36
+ self.scheduled_start_time == incident.scheduled_start_time and
37
+ self.scheduled_start_station == incident.scheduled_start_station and
38
+ self.scheduled_destination_station == incident.scheduled_destination_station and
39
+ self.effect_on_service == incident.effect_on_service
40
+ end
40
41
 
42
+ end
41
43
  end
@@ -1,28 +1,28 @@
1
- require 'rubygems'
2
1
  require 'hpricot'
3
- require 'affected_service'
4
2
 
5
- class AffectedServicesReport
3
+ module SoutheasternDailyPerformance
4
+ class AffectedServicesReport
6
5
 
7
- def initialize(html)
8
- html.gsub!(/&nbsp;/, ' ')
9
- @doc = Hpricot(html)
10
- end
6
+ def initialize(html)
7
+ html.gsub!(/&nbsp;/, ' ')
8
+ @doc = Hpricot(html)
9
+ end
11
10
 
12
- def affected_services
13
- @doc.search('*').collect do |elem|
14
- if elem.text? and incident_text = elem.inner_text[/\d\d:\d\d.*/]
15
- reason = find_previous_strong_element(elem).inner_text
16
- AffectedService.new(reason.strip, incident_text.strip)
17
- end
18
- end.compact
19
- end
11
+ def affected_services
12
+ @doc.search('*').collect do |elem|
13
+ if elem.text? and incident_text = elem.inner_text[/\d\d:\d\d.*/]
14
+ reason = find_previous_strong_element(elem).inner_text
15
+ AffectedService.new(reason.strip, incident_text.strip)
16
+ end
17
+ end.compact
18
+ end
20
19
 
21
- private
20
+ private
22
21
 
23
- def find_previous_strong_element(elem)
24
- return elem if (elem.respond_to?(:name) and elem.name == 'strong')
25
- find_previous_strong_element(elem.previous)
26
- end
22
+ def find_previous_strong_element(elem)
23
+ return elem if (elem.respond_to?(:name) and elem.name == 'strong')
24
+ find_previous_strong_element(elem.previous)
25
+ end
27
26
 
27
+ end
28
28
  end
@@ -1,55 +1,57 @@
1
- require 'rubygems'
2
1
  require 'hpricot'
3
- require 'affected_services_report'
4
2
  require 'csv'
3
+ require 'date'
5
4
 
6
- class DailyPerformanceReport
5
+ module SoutheasternDailyPerformance
6
+ class DailyPerformanceReport
7
7
 
8
- def initialize(html)
9
- @doc = Hpricot(html)
10
- end
8
+ def initialize(html)
9
+ html = html.gsub(%r!&nbsp;!, ' ')
10
+ @doc = Hpricot(html)
11
+ end
11
12
 
12
- def date
13
- date = (@doc/'h1').inner_text
14
- date.gsub!(/\?/, ' ')
15
- if date =~ /(.+?) (\d+) (.+)/
16
- elsif (@doc/'h2').inner_text =~ /(.+?) (\d+) (.+)/
13
+ def date
14
+ date = (@doc/'h1').inner_text
15
+ date.gsub!(/\?/, ' ')
16
+ if date =~ /(.+?) (\d+) (.+)/
17
+ elsif (@doc/'h2').inner_text =~ /(.+?) (\d+) (.+)/
18
+ end
19
+ day_name, day, month_name = $1, $2, $3
20
+ date = [day, month_name[0..2].downcase, '2010'].join('-')
21
+ Date.parse(date)
17
22
  end
18
- day_name, day, month_name = $1, $2, $3
19
- date = [day, month_name[0..2].downcase, '2010'].join('-')
20
- Date.parse(date)
21
- end
22
23
 
23
- def scheduled_services
24
- report[/(\d+) train services were scheduled/, 1].to_i
25
- end
24
+ def scheduled_services
25
+ report[/(\d+) train services were scheduled/, 1].to_i
26
+ end
26
27
 
27
- def actual_services
28
- report[/of which (\d+) ran/, 1].to_i
29
- end
28
+ def actual_services
29
+ report[/of which (\d+) ran/, 1].to_i
30
+ end
30
31
 
31
- def services_within_five_minutes_of_schedule
32
- report[/(\d+)% of services ran within 5 minutes of schedule/, 1].to_i
33
- end
32
+ def services_within_five_minutes_of_schedule
33
+ report[/(\d+)% of services ran within 5 minutes of schedule/, 1].to_i
34
+ end
34
35
 
35
- def affected_services
36
- AffectedServicesReport.new(report_container.inner_html).affected_services
37
- end
36
+ def affected_services
37
+ AffectedServicesReport.new(report_container.inner_html).affected_services
38
+ end
38
39
 
39
- def to_csv
40
- affected_services.collect do |service|
41
- CSV.generate_line [date, service.reason_for_disruption, service.scheduled_start_time, service.scheduled_start_station, service.scheduled_destination_station, service.effect_on_service]
42
- end.join("\n")
43
- end
40
+ def to_csv
41
+ affected_services.collect do |service|
42
+ CSV.generate_line [date, service.reason_for_disruption, service.scheduled_start_time, service.scheduled_start_station, service.scheduled_destination_station, service.effect_on_service]
43
+ end.join("\n")
44
+ end
44
45
 
45
- private
46
+ private
46
47
 
47
- def report
48
- report_container.inner_text
49
- end
48
+ def report
49
+ report_container.inner_text
50
+ end
50
51
 
51
- def report_container
52
- (@doc/'h1').first.parent.next_sibling
53
- end
52
+ def report_container
53
+ (@doc/'h1').first.parent.next_sibling
54
+ end
54
55
 
56
+ end
55
57
  end
metadata CHANGED
@@ -1,7 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: southeastern-daily-performance
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 3
9
+ version: 0.0.3
5
10
  platform: ruby
6
11
  authors:
7
12
  - Chris Roos
@@ -9,10 +14,21 @@ autorequire:
9
14
  bindir: bin
10
15
  cert_chain: []
11
16
 
12
- date: 2010-02-14 00:00:00 +00:00
17
+ date: 2010-10-09 00:00:00 +01:00
13
18
  default_executable:
14
- dependencies: []
15
-
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: hpricot
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 0
29
+ version: "0"
30
+ type: :runtime
31
+ version_requirements: *id001
16
32
  description:
17
33
  email: chris@seagul.co.uk
18
34
  executables:
@@ -20,9 +36,9 @@ executables:
20
36
  extensions: []
21
37
 
22
38
  extra_rdoc_files:
23
- - README
39
+ - README.md
24
40
  files:
25
- - README
41
+ - README.md
26
42
  - Rakefile
27
43
  - lib/sedpr/affected_service.rb
28
44
  - lib/sedpr/affected_services_report.rb
@@ -35,25 +51,27 @@ licenses: []
35
51
  post_install_message:
36
52
  rdoc_options:
37
53
  - --main
38
- - README
54
+ - README.md
39
55
  require_paths:
40
56
  - lib
41
57
  required_ruby_version: !ruby/object:Gem::Requirement
42
58
  requirements:
43
59
  - - ">="
44
60
  - !ruby/object:Gem::Version
61
+ segments:
62
+ - 0
45
63
  version: "0"
46
- version:
47
64
  required_rubygems_version: !ruby/object:Gem::Requirement
48
65
  requirements:
49
66
  - - ">="
50
67
  - !ruby/object:Gem::Version
68
+ segments:
69
+ - 0
51
70
  version: "0"
52
- version:
53
71
  requirements: []
54
72
 
55
73
  rubyforge_project: southeastern-daily-performance
56
- rubygems_version: 1.3.5
74
+ rubygems_version: 1.3.6
57
75
  signing_key:
58
76
  specification_version: 3
59
77
  summary: Converts Southeaster Daily Performance reports from HTML to CSV
data/README DELETED
@@ -1,18 +0,0 @@
1
- Installation
2
- ============
3
- $ gem install southeastern-daily-performance -r http://gemcutter.org
4
-
5
-
6
- Usage
7
- =====
8
- $ sedpr-to-csv <location-of-html>
9
-
10
-
11
- Examples
12
- ========
13
- # Explicitly download html and convert local file
14
- $ curl "http://www.southeasternrailway.co.uk/index.php/cms/pages/view/132" > sedpr.html
15
- $ sedpr-to-csv sedpr.html
16
-
17
- # Implicitly download html and convert
18
- $ sedpr-to-csv http://www.southeasternrailway.co.uk/index.php/cms/pages/view/132