sps_bill 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,102 @@
1
+ # SpsBill::BillCollection is an Array-like class that represents a collection
2
+ # of SP Services PDF bills.
3
+ #
4
+ # The <tt>load</tt> method is used to initialise the collection given a path specification.
5
+ #
6
+ # A range of collection methods are provided to extract sets of data
7
+ # from the entire collection (e.g. <tt>electricity_usages</tt>).
8
+ #
9
+ class SpsBill::BillCollection < Array
10
+
11
+ class << self
12
+
13
+ # Returns an array of Bill objects for PDF files matching +path_spec+.
14
+ # +path_spec+ may be either:
15
+ # - an array of filenames e.g. ['data/file1.pdf','file2.pdf']
16
+ # - or a single file or path spec e.g. './somepath/file1.pdf' or './somepath/*.pdf'
17
+ def load(path_spec)
18
+ path_spec = Dir[path_spec] unless path_spec.class <= Array
19
+ path_spec.each_with_object(new) do |filename,memo|
20
+ memo << SpsBill::Bill.new(filename)
21
+ end
22
+ end
23
+
24
+ end
25
+
26
+ def headers(dataset_selector)
27
+ case dataset_selector
28
+ when :total_amounts
29
+ ['invoice_month','amount']
30
+ when :electricity_usages
31
+ ['invoice_month','kwh','rate','amount']
32
+ when :gas_usages
33
+ ['invoice_month','kwh','rate','amount']
34
+ when :water_usages
35
+ ['invoice_month','cubic_m','rate','amount']
36
+ when :all_data
37
+ ['invoice_month','measure','kwh','cubic_m','rate','amount']
38
+ end
39
+ end
40
+
41
+ # Returns a hash of all data by month
42
+ # [[month,measure,kwh,cubic_m,rate,amount]]
43
+ # measure: total_charges,electricity,gas,water
44
+ def all_data
45
+ total_amounts(:all) + electricity_usages(:all) + gas_usages(:all) + water_usages(:all)
46
+ end
47
+
48
+ # Returns a hash of total bill amounts by month
49
+ # [[month,amount]]
50
+ def total_amounts(style=:solo)
51
+ each_with_object([]) do |bill,memo|
52
+ if style==:solo
53
+ memo << [bill.invoice_month.to_s,bill.total_amount]
54
+ else
55
+ memo << [bill.invoice_month.to_s,'total_charges',nil,nil,nil,bill.total_amount]
56
+ end
57
+ end
58
+ end
59
+
60
+ # Returns a hash of electricity_usages by month
61
+ # [[month,kwh,rate,amount]]
62
+ def electricity_usages(style=:solo)
63
+ each_with_object([]) do |bill,memo|
64
+ bill.electricity_usage.each do |usage|
65
+ if style==:solo
66
+ memo << [bill.invoice_month.to_s,usage[:kwh],usage[:rate],usage[:amount]]
67
+ else
68
+ memo << [bill.invoice_month.to_s,'electricity',usage[:kwh],nil,usage[:rate],usage[:amount]]
69
+ end
70
+ end
71
+ end
72
+ end
73
+
74
+ # Returns a hash of gas_usages by month
75
+ # [[month,kwh,rate,amount]]
76
+ def gas_usages(style=:solo)
77
+ each_with_object([]) do |bill,memo|
78
+ bill.gas_usage.each do |usage|
79
+ if style==:solo
80
+ memo << [bill.invoice_month.to_s,usage[:kwh],usage[:rate],usage[:amount]]
81
+ else
82
+ memo << [bill.invoice_month.to_s,'gas',usage[:kwh],nil,usage[:rate],usage[:amount]]
83
+ end
84
+ end
85
+ end
86
+ end
87
+
88
+ # Returns a hash of water_usages by month
89
+ # [[month,kwh,rate,amount]]
90
+ def water_usages(style=:solo)
91
+ each_with_object([]) do |bill,memo|
92
+ bill.water_usage.each do |usage|
93
+ if style==:solo
94
+ memo << [bill.invoice_month.to_s,usage[:cubic_m],usage[:rate],usage[:amount]]
95
+ else
96
+ memo << [bill.invoice_month.to_s,'water',nil,usage[:cubic_m],usage[:rate],usage[:amount]]
97
+ end
98
+ end
99
+ end
100
+ end
101
+
102
+ end
@@ -0,0 +1,92 @@
1
+ require 'date'
2
+
3
+ # all the bill scanning and parsing intelligence
4
+ module SpsBill::BillParser
5
+
6
+ ELECTRICITY_SERVICE_HEAD = "Electricity Services"
7
+ GAS_SERVICE_HEAD = "Gas Services by City Gas Pte Ltd"
8
+ WATER_SERVICE_HEAD = "Water Services by Public Utilities Board"
9
+
10
+ # Returns a collection of parser errors
11
+ def errors
12
+ @errors ||= []
13
+ end
14
+
15
+ # Command: scans and extracts billing details from the pdf doc
16
+ def do_complete_parse
17
+ return unless reader
18
+ methods.select{|m| m =~ /^parse_/ }.each do |m|
19
+ begin
20
+ send(m)
21
+ rescue => e
22
+ errors << "failure parsing #{source_file}:#{m} #{e.inspect}"
23
+ end
24
+ end
25
+ end
26
+
27
+ # Command: extracts the account number
28
+ def parse_account_number
29
+ @account_number = reader.text_in_rect(383.0,999.0,785.0,790.0,1).flatten.join('')
30
+ end
31
+
32
+ # Command: extracts the total amount due for the current month
33
+ def parse_total_amount
34
+ @total_amount = if ref = reader.text_position(/^Total Current Charges due on/)
35
+ total_parts = reader.text_in_rect(ref[:x] + 1,400.0,ref[:y] - 1,ref[:y] + 1,1)
36
+ total_parts.flatten.first.to_f
37
+ end
38
+ end
39
+
40
+ # Command: extracts the invoice date
41
+ def parse_invoice_date
42
+ @invoice_date = if ref = reader.text_position("Dated")
43
+ date_parts = reader.text_in_rect(ref[:x] + 1,999.0,ref[:y] - 1,ref[:y] + 1,1)
44
+ Date.parse(date_parts.first.join('-'))
45
+ end
46
+ end
47
+
48
+ # Command: extracts the invoice month (as Date, set to 1st of the month)
49
+ def parse_invoice_month
50
+ @invoice_month = if ref = reader.text_position("Dated")
51
+ date_parts = reader.text_in_rect(ref[:x] + 1,999.0,ref[:y] - 1,ref[:y] + 1,1)
52
+ m_parts = reader.text_in_rect(ref[:x]-200,ref[:x]-1,ref[:y] - 1,ref[:y] + 1,1)
53
+ Date.parse("#{date_parts.first.last}-#{m_parts.first.first}-01")
54
+ end
55
+ end
56
+
57
+ # Command: extracts an array of electricity usage charges. Each charge is a Hash:
58
+ # { kwh: float, rate: float, amount: float }
59
+ def parse_electricity_usage
60
+ @electricity_usage = if upper_ref = reader.text_position(ELECTRICITY_SERVICE_HEAD)
61
+ lower_ref = reader.text_position(GAS_SERVICE_HEAD)
62
+ lower_ref ||= reader.text_position(WATER_SERVICE_HEAD)
63
+ if lower_ref
64
+ raw_data = reader.text_in_rect(240.0,450.0,lower_ref[:y]+1,upper_ref[:y],1)
65
+ raw_data.map{|l| {:kwh => l[0].gsub(/kwh/i,'').to_f, :rate => l[1].to_f, :amount => l[2].to_f} }
66
+ end
67
+ end
68
+ end
69
+
70
+ # Command: extracts an array of gas usage charges. Each charge is a Hash:
71
+ # { kwh: float, rate: float, amount: float }
72
+ def parse_gas_usage
73
+ @gas_usage = if upper_ref = reader.text_position(GAS_SERVICE_HEAD)
74
+ if lower_ref = reader.text_position(WATER_SERVICE_HEAD)
75
+ raw_data = reader.text_in_rect(240.0,450.0,lower_ref[:y]+1,upper_ref[:y],1)
76
+ raw_data.map{|l| {:kwh => l[0].gsub(/kwh/i,'').to_f, :rate => l[1].to_f, :amount => l[2].to_f} }
77
+ end
78
+ end
79
+ end
80
+
81
+ # Command: extracts an array of water usage charges. Each charge is a Hash:
82
+ # { cubic_m: float, rate: float, amount: float }
83
+ def parse_water_usage
84
+ @water_usage = if upper_ref = reader.text_position(WATER_SERVICE_HEAD)
85
+ if lower_ref = reader.text_position("Waterborne Fee")
86
+ raw_data = reader.text_in_rect(240.0,450.0,lower_ref[:y]+1,upper_ref[:y],1)
87
+ raw_data.map{|l| {:cubic_m => l[0].gsub(/cu m/i,'').to_f, :rate => l[1].to_f, :amount => l[2].to_f} }
88
+ end
89
+ end
90
+ end
91
+
92
+ end
@@ -0,0 +1,71 @@
1
+ class SpsBill::Shell
2
+ attr_accessor :options, :fileset
3
+
4
+ # command line options definition
5
+ OPTIONS = %w(help verbose csv+ raw+ data=s)
6
+
7
+ # Usage message
8
+ def self.usage
9
+ puts <<-EOS
10
+
11
+ SP Services Bill Scanner v#{SpsBill::Version::STRING}
12
+ ===================================
13
+
14
+ Usage:
15
+ sps_bill [options]
16
+
17
+ Command Options
18
+ -r | --raw raw data format (without headers)
19
+ -c | --csv output in CSV format (default)
20
+ -d= | --data=[charges,electricity,gas,water,all]
21
+
22
+ EOS
23
+ end
24
+
25
+ # +new+
26
+ def initialize(options)
27
+ @fileset = ARGV
28
+ @options = (options||{}).each{|k,v| {k => v} }
29
+ end
30
+
31
+ def run
32
+ if options[:help]
33
+ self.class.usage
34
+ return
35
+ end
36
+ case options[:data]
37
+ when /^c/
38
+ export(:total_amounts)
39
+ when /^e/
40
+ export(:electricity_usages)
41
+ when /^g/
42
+ export(:gas_usages)
43
+ when /^w/
44
+ export(:water_usages)
45
+ # when /^a/
46
+ else
47
+ export(:all_data)
48
+ end
49
+ end
50
+
51
+ def bills
52
+ @bills ||= SpsBill::BillCollection.load(fileset)
53
+ end
54
+
55
+ def export(dataset_selector)
56
+ format_header bills.headers(dataset_selector)
57
+ format_rows bills.send(dataset_selector)
58
+ end
59
+
60
+ def format_rows(data)
61
+ data.each do |row|
62
+ puts row.join(',')
63
+ end
64
+ end
65
+
66
+ def format_header(data)
67
+ return if options[:raw]
68
+ puts data.join(',')
69
+ end
70
+
71
+ end
@@ -0,0 +1,9 @@
1
+ module SpsBill
2
+ class Version
3
+ MAJOR = 0
4
+ MINOR = 1
5
+ PATCH = 0
6
+
7
+ STRING = [MAJOR, MINOR, PATCH].compact.join('.')
8
+ end
9
+ end
data/lib/sps_bill.rb ADDED
@@ -0,0 +1,13 @@
1
+ require 'pdf-reader'
2
+ require 'pdf/object_hash'
3
+ require 'pdf/positional_text_receiver'
4
+ require 'pdf/textangle'
5
+ require 'pdf/structured_reader'
6
+
7
+ module SpsBill
8
+ end
9
+ require 'sps_bill/version'
10
+ require 'sps_bill/bill_collection'
11
+ require 'sps_bill/bill_parser'
12
+ require 'sps_bill/bill'
13
+ require 'sps_bill/shell'
File without changes
@@ -0,0 +1,20 @@
1
+ #!/bin/bash
2
+ #
3
+ # Demonstrates how to invoke the sps_bill command line to extract
4
+ # all billing data for a set of files
5
+ #
6
+ # Example Usage:
7
+ #
8
+ # scan_all_bills.sh my_bills/*.pdf > my_bill_data.csv
9
+ #
10
+ scriptPath=${0%/*}/
11
+
12
+ v_files=${1:-help}
13
+
14
+ if [ "${v_files}" == "help" ]
15
+ then
16
+ echo "usage: scan_all_bills.sh ./path/to_pdf_files*.pdf"
17
+ exit
18
+ fi
19
+
20
+ ${scriptPath}../bin/sps_bill --data=all $*
File without changes
@@ -0,0 +1,71 @@
1
+ <html>
2
+ <head></head>
3
+ %PDF-1.3
4
+ %����
5
+ 1 0 obj
6
+ << /Creator <feff0050007200610077006e>
7
+ /Producer <feff0050007200610077006e>
8
+ >>
9
+ endobj
10
+ 2 0 obj
11
+ << /Type /Catalog
12
+ /Pages 3 0 R
13
+ >>
14
+ endobj
15
+ 3 0 obj
16
+ << /Type /Pages
17
+ /Count 1
18
+ /Kids [5 0 R]
19
+ >>
20
+ endobj
21
+ 4 0 obj
22
+ << /Length 157
23
+ >>
24
+ stream
25
+ q
26
+
27
+ BT
28
+ 36 747.384 Td
29
+ /F1.0 12 Tf
30
+ [<546869732050444620636f6e7461696e73206a756e6b20626566> 30 <6f72652074686520252d504446206d6172> -15 <6b> 20 <6572>] TJ
31
+ ET
32
+
33
+ Q
34
+
35
+ endstream
36
+ endobj
37
+ 5 0 obj
38
+ << /Type /Page
39
+ /Parent 3 0 R
40
+ /MediaBox [0 0 612.0 792.0]
41
+ /Contents 4 0 R
42
+ /Resources << /ProcSet [/PDF /Text /ImageB /ImageC /ImageI]
43
+ /Font << /F1.0 6 0 R
44
+ >>
45
+ >>
46
+ >>
47
+ endobj
48
+ 6 0 obj
49
+ << /Type /Font
50
+ /Subtype /Type1
51
+ /BaseFont /Helvetica
52
+ /Encoding /WinAnsiEncoding
53
+ >>
54
+ endobj
55
+ xref
56
+ 0 7
57
+ 0000000000 65535 f
58
+ 0000000015 00000 n
59
+ 0000000109 00000 n
60
+ 0000000158 00000 n
61
+ 0000000215 00000 n
62
+ 0000000423 00000 n
63
+ 0000000601 00000 n
64
+ trailer
65
+ << /Size 7
66
+ /Root 2 0 R
67
+ /Info 1 0 R
68
+ >>
69
+ startxref
70
+ 698
71
+ %%EOF
File without changes
@@ -0,0 +1,48 @@
1
+ # rename this file to expectations.yml and use it to
2
+ # test all your SPS bill files that you copy to spec/fixtures/personal_pdf_samples.
3
+ #
4
+ # For each bill file, create a section below (like the examples shown) and
5
+ # update it with the actual values that should come from the bill.
6
+ #
7
+ # This is a YAML-format file, so beware that indentation is significant
8
+ ---
9
+ my_sps_bill-2012-02.pdf:
10
+ :account_number: '8123123123'
11
+ :invoice_date: 2012-02-29
12
+ :invoice_month: 2012-02-01
13
+ :total_amount: 251.44
14
+ :electricity_usage:
15
+ - :kwh: 4.0
16
+ :rate: 0.241
17
+ :amount: 0.97
18
+ - :kwh: 616.0
19
+ :rate: 0.2558
20
+ :amount: 157.57
21
+ :gas_usage:
22
+ - :kwh: 18.0
23
+ :rate: 0.1799
24
+ :amount: 3.24
25
+ :water_usage:
26
+ - :cubic_m: 36.1
27
+ :rate: 1.17
28
+ :amount: 42.24
29
+ - :cubic_m: -3.0
30
+ :rate: 1.4
31
+ :amount: -4.2
32
+ my_sps_bill-2012-03.pdf:
33
+ :account_number: '8123123123'
34
+ :invoice_date: 2012-03-31
35
+ :invoice_month: 2012-03-01
36
+ :total_amount: 235.7
37
+ :electricity_usage:
38
+ - :kwh: 519.0
39
+ :rate: 0.2558
40
+ :amount: 132.76
41
+ :gas_usage:
42
+ - :kwh: 15.0
43
+ :rate: 0.1799
44
+ :amount: 2.7
45
+ :water_usage:
46
+ - :cubic_m: 38.7
47
+ :rate: 1.17
48
+ :amount: 45.28
@@ -0,0 +1,74 @@
1
+ require 'spec_helper'
2
+ include PdfSamplesHelper
3
+
4
+ describe "Personal PDF Samples" do
5
+
6
+ if personal_pdf_sample_names.empty?
7
+ pending %(
8
+
9
+ You can place the PDFs of your own bills in spec/fixtures/personal_pdf_samples.
10
+ They will be tested when you run the specs, but are hidden from being added to the
11
+ git repository.
12
+
13
+ )
14
+ end
15
+
16
+ # This will scan all *.pdf files in spec/fixtures/personal_pdf_samples
17
+ # and do basic verification of the file structure without any effort from you.
18
+ personal_pdf_sample_names.each do |sample|
19
+ describe sample do
20
+ let(:sample_name) { sample }
21
+ let(:bill) { SpsBill::Bill.new(sample_name) }
22
+
23
+ it_behaves_like "has a valid reader", :bill
24
+ it_behaves_like "has a valid account number", :bill
25
+ it_behaves_like "has a valid invoice date", :bill
26
+
27
+
28
+ end
29
+ end
30
+
31
+ # This will read spec/fixtures/personal_pdf_samples/expectations.yml
32
+ # and and test according to the definitions it contains.
33
+ #
34
+ # See spec/fixtures/personal_pdf_samples/expectations.yml.sample
35
+ # for details on how to setup the expectations.yml file
36
+ #
37
+ personal_pdf_sample_expectations.each do |sample_name,expectations|
38
+ describe sample_name do
39
+ let(:sample_file) { personal_pdf_sample_path.join(sample_name).to_s }
40
+ let(:bill) { SpsBill::Bill.new(sample_file) }
41
+ subject { bill }
42
+
43
+ if expectations[:account_number]
44
+ its(:account_number) { should eql(expectations[:account_number])}
45
+ end
46
+
47
+ if expectations[:total_amount]
48
+ its(:total_amount) { should eql(expectations[:total_amount])}
49
+ end
50
+
51
+ if expectations[:invoice_date]
52
+ its(:invoice_date) { should eql(expectations[:invoice_date])}
53
+ end
54
+
55
+ if expectations[:invoice_month]
56
+ its(:invoice_month) { should eql(expectations[:invoice_month])}
57
+ end
58
+
59
+ if expectations[:electricity_usage]
60
+ its(:electricity_usage) { should eql(expectations[:electricity_usage])}
61
+ end
62
+
63
+ if expectations[:gas_usage]
64
+ its(:gas_usage) { should eql(expectations[:gas_usage])}
65
+ end
66
+
67
+ if expectations[:water_usage]
68
+ its(:water_usage) { should eql(expectations[:water_usage])}
69
+ end
70
+
71
+ end
72
+ end
73
+
74
+ end
@@ -0,0 +1,24 @@
1
+ require 'sps_bill'
2
+
3
+ # Requires supporting files with custom matchers and macros, etc,
4
+ # in ./support/ and its subdirectories.
5
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
6
+
7
+ RSpec.configure do |config|
8
+ # == Mock Framework
9
+ #
10
+ # If you prefer to use mocha, flexmock or RR, uncomment the appropriate line:
11
+ #
12
+ # config.mock_with :mocha
13
+ # config.mock_with :flexmock
14
+ # config.mock_with :rr
15
+ config.mock_with :rspec
16
+
17
+ # Remove this line if you're not using ActiveRecord or ActiveRecord fixtures
18
+ # config.fixture_path = "#{::Rails.root}/spec/fixtures"
19
+
20
+ # If you're not using ActiveRecord, or you'd prefer not to run each of your
21
+ # examples within a transaction, remove the following line or assign false
22
+ # instead of true.
23
+ # config.use_transactional_fixtures = true
24
+ end
@@ -0,0 +1,31 @@
1
+ shared_examples_for "has a valid reader" do |resource_key|
2
+ # args:
3
+ # +resource_key+ is the sym for the resource to test
4
+ describe "#reader" do
5
+ let(:resource) { eval "#{resource_key}" }
6
+ let(:reader) { resource.reader }
7
+ subject { reader }
8
+ it { should be_a(PDF::StructuredReader) }
9
+ end
10
+ end
11
+
12
+ shared_examples_for "has a valid account number" do |resource_key|
13
+ # args:
14
+ # +resource_key+ is the sym for the resource to test
15
+ describe "#account_number" do
16
+ let(:resource) { eval "#{resource_key}" }
17
+ subject { resource.account_number }
18
+ it { should be_a(String) }
19
+ it { should match(/^\d+$/) }
20
+ end
21
+ end
22
+
23
+ shared_examples_for "has a valid invoice date" do |resource_key|
24
+ # args:
25
+ # +resource_key+ is the sym for the resource to test
26
+ describe "#invoice_date" do
27
+ let(:resource) { eval "#{resource_key}" }
28
+ subject { resource.invoice_date }
29
+ it { should be_a(Date) }
30
+ end
31
+ end
@@ -0,0 +1,37 @@
1
+ require 'pathname'
2
+ require 'yaml'
3
+
4
+ module PdfSamplesHelper
5
+
6
+ def pdf_sample_path
7
+ Pathname.new(File.dirname(__FILE__)).join('..','fixtures','pdf_samples')
8
+ end
9
+
10
+ def personal_pdf_sample_path
11
+ Pathname.new(File.dirname(__FILE__)).join('..','fixtures','personal_pdf_samples')
12
+ end
13
+
14
+ def pdf_sample_name
15
+ pdf_sample_path.join('bill_a.pdf').to_s
16
+ end
17
+
18
+ def junk_prefix_pdf_sample_name
19
+ pdf_sample_path.join('junk_prefix.pdf').to_s
20
+ end
21
+
22
+ def personal_pdf_sample_names
23
+ Dir["#{File.dirname(__FILE__)}/../fixtures/personal_pdf_samples/*.pdf"]
24
+ end
25
+
26
+ def personal_pdf_sample_expectations_path
27
+ Pathname.new(File.dirname(__FILE__)).join('..','fixtures','personal_pdf_samples','expectations.yml')
28
+ end
29
+
30
+ def personal_pdf_sample_expectations
31
+ begin
32
+ YAML.load_file personal_pdf_sample_expectations_path
33
+ rescue
34
+ []
35
+ end
36
+ end
37
+ end