sps_bill 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,102 @@
1
+ # SpsBill::BillCollection is an Array-like class that represents a collection
2
+ # of SP Services PDF bills.
3
+ #
4
+ # The <tt>load</tt> method is used to initialise the collection given a path specification.
5
+ #
6
+ # A range of collection methods are provided to extract sets of data
7
+ # from the entire collection (e.g. <tt>electricity_usages</tt>).
8
+ #
9
+ class SpsBill::BillCollection < Array
10
+
11
+ class << self
12
+
13
+ # Returns an array of Bill objects for PDF files matching +path_spec+.
14
+ # +path_spec+ may be either:
15
+ # - an array of filenames e.g. ['data/file1.pdf','file2.pdf']
16
+ # - or a single file or path spec e.g. './somepath/file1.pdf' or './somepath/*.pdf'
17
+ def load(path_spec)
18
+ path_spec = Dir[path_spec] unless path_spec.class <= Array
19
+ path_spec.each_with_object(new) do |filename,memo|
20
+ memo << SpsBill::Bill.new(filename)
21
+ end
22
+ end
23
+
24
+ end
25
+
26
+ def headers(dataset_selector)
27
+ case dataset_selector
28
+ when :total_amounts
29
+ ['invoice_month','amount']
30
+ when :electricity_usages
31
+ ['invoice_month','kwh','rate','amount']
32
+ when :gas_usages
33
+ ['invoice_month','kwh','rate','amount']
34
+ when :water_usages
35
+ ['invoice_month','cubic_m','rate','amount']
36
+ when :all_data
37
+ ['invoice_month','measure','kwh','cubic_m','rate','amount']
38
+ end
39
+ end
40
+
41
+ # Returns a hash of all data by month
42
+ # [[month,measure,kwh,cubic_m,rate,amount]]
43
+ # measure: total_charges,electricity,gas,water
44
+ def all_data
45
+ total_amounts(:all) + electricity_usages(:all) + gas_usages(:all) + water_usages(:all)
46
+ end
47
+
48
+ # Returns a hash of total bill amounts by month
49
+ # [[month,amount]]
50
+ def total_amounts(style=:solo)
51
+ each_with_object([]) do |bill,memo|
52
+ if style==:solo
53
+ memo << [bill.invoice_month.to_s,bill.total_amount]
54
+ else
55
+ memo << [bill.invoice_month.to_s,'total_charges',nil,nil,nil,bill.total_amount]
56
+ end
57
+ end
58
+ end
59
+
60
+ # Returns a hash of electricity_usages by month
61
+ # [[month,kwh,rate,amount]]
62
+ def electricity_usages(style=:solo)
63
+ each_with_object([]) do |bill,memo|
64
+ bill.electricity_usage.each do |usage|
65
+ if style==:solo
66
+ memo << [bill.invoice_month.to_s,usage[:kwh],usage[:rate],usage[:amount]]
67
+ else
68
+ memo << [bill.invoice_month.to_s,'electricity',usage[:kwh],nil,usage[:rate],usage[:amount]]
69
+ end
70
+ end
71
+ end
72
+ end
73
+
74
+ # Returns a hash of gas_usages by month
75
+ # [[month,kwh,rate,amount]]
76
+ def gas_usages(style=:solo)
77
+ each_with_object([]) do |bill,memo|
78
+ bill.gas_usage.each do |usage|
79
+ if style==:solo
80
+ memo << [bill.invoice_month.to_s,usage[:kwh],usage[:rate],usage[:amount]]
81
+ else
82
+ memo << [bill.invoice_month.to_s,'gas',usage[:kwh],nil,usage[:rate],usage[:amount]]
83
+ end
84
+ end
85
+ end
86
+ end
87
+
88
+ # Returns a hash of water_usages by month
89
+ # [[month,kwh,rate,amount]]
90
+ def water_usages(style=:solo)
91
+ each_with_object([]) do |bill,memo|
92
+ bill.water_usage.each do |usage|
93
+ if style==:solo
94
+ memo << [bill.invoice_month.to_s,usage[:cubic_m],usage[:rate],usage[:amount]]
95
+ else
96
+ memo << [bill.invoice_month.to_s,'water',nil,usage[:cubic_m],usage[:rate],usage[:amount]]
97
+ end
98
+ end
99
+ end
100
+ end
101
+
102
+ end
@@ -0,0 +1,92 @@
1
+ require 'date'
2
+
3
+ # all the bill scanning and parsing intelligence
4
+ module SpsBill::BillParser
5
+
6
+ ELECTRICITY_SERVICE_HEAD = "Electricity Services"
7
+ GAS_SERVICE_HEAD = "Gas Services by City Gas Pte Ltd"
8
+ WATER_SERVICE_HEAD = "Water Services by Public Utilities Board"
9
+
10
+ # Returns a collection of parser errors
11
+ def errors
12
+ @errors ||= []
13
+ end
14
+
15
+ # Command: scans and extracts billing details from the pdf doc
16
+ def do_complete_parse
17
+ return unless reader
18
+ methods.select{|m| m =~ /^parse_/ }.each do |m|
19
+ begin
20
+ send(m)
21
+ rescue => e
22
+ errors << "failure parsing #{source_file}:#{m} #{e.inspect}"
23
+ end
24
+ end
25
+ end
26
+
27
+ # Command: extracts the account number
28
+ def parse_account_number
29
+ @account_number = reader.text_in_rect(383.0,999.0,785.0,790.0,1).flatten.join('')
30
+ end
31
+
32
+ # Command: extracts the total amount due for the current month
33
+ def parse_total_amount
34
+ @total_amount = if ref = reader.text_position(/^Total Current Charges due on/)
35
+ total_parts = reader.text_in_rect(ref[:x] + 1,400.0,ref[:y] - 1,ref[:y] + 1,1)
36
+ total_parts.flatten.first.to_f
37
+ end
38
+ end
39
+
40
+ # Command: extracts the invoice date
41
+ def parse_invoice_date
42
+ @invoice_date = if ref = reader.text_position("Dated")
43
+ date_parts = reader.text_in_rect(ref[:x] + 1,999.0,ref[:y] - 1,ref[:y] + 1,1)
44
+ Date.parse(date_parts.first.join('-'))
45
+ end
46
+ end
47
+
48
+ # Command: extracts the invoice month (as Date, set to 1st of the month)
49
+ def parse_invoice_month
50
+ @invoice_month = if ref = reader.text_position("Dated")
51
+ date_parts = reader.text_in_rect(ref[:x] + 1,999.0,ref[:y] - 1,ref[:y] + 1,1)
52
+ m_parts = reader.text_in_rect(ref[:x]-200,ref[:x]-1,ref[:y] - 1,ref[:y] + 1,1)
53
+ Date.parse("#{date_parts.first.last}-#{m_parts.first.first}-01")
54
+ end
55
+ end
56
+
57
+ # Command: extracts an array of electricity usage charges. Each charge is a Hash:
58
+ # { kwh: float, rate: float, amount: float }
59
+ def parse_electricity_usage
60
+ @electricity_usage = if upper_ref = reader.text_position(ELECTRICITY_SERVICE_HEAD)
61
+ lower_ref = reader.text_position(GAS_SERVICE_HEAD)
62
+ lower_ref ||= reader.text_position(WATER_SERVICE_HEAD)
63
+ if lower_ref
64
+ raw_data = reader.text_in_rect(240.0,450.0,lower_ref[:y]+1,upper_ref[:y],1)
65
+ raw_data.map{|l| {:kwh => l[0].gsub(/kwh/i,'').to_f, :rate => l[1].to_f, :amount => l[2].to_f} }
66
+ end
67
+ end
68
+ end
69
+
70
+ # Command: extracts an array of gas usage charges. Each charge is a Hash:
71
+ # { kwh: float, rate: float, amount: float }
72
+ def parse_gas_usage
73
+ @gas_usage = if upper_ref = reader.text_position(GAS_SERVICE_HEAD)
74
+ if lower_ref = reader.text_position(WATER_SERVICE_HEAD)
75
+ raw_data = reader.text_in_rect(240.0,450.0,lower_ref[:y]+1,upper_ref[:y],1)
76
+ raw_data.map{|l| {:kwh => l[0].gsub(/kwh/i,'').to_f, :rate => l[1].to_f, :amount => l[2].to_f} }
77
+ end
78
+ end
79
+ end
80
+
81
+ # Command: extracts an array of water usage charges. Each charge is a Hash:
82
+ # { cubic_m: float, rate: float, amount: float }
83
+ def parse_water_usage
84
+ @water_usage = if upper_ref = reader.text_position(WATER_SERVICE_HEAD)
85
+ if lower_ref = reader.text_position("Waterborne Fee")
86
+ raw_data = reader.text_in_rect(240.0,450.0,lower_ref[:y]+1,upper_ref[:y],1)
87
+ raw_data.map{|l| {:cubic_m => l[0].gsub(/cu m/i,'').to_f, :rate => l[1].to_f, :amount => l[2].to_f} }
88
+ end
89
+ end
90
+ end
91
+
92
+ end
@@ -0,0 +1,71 @@
1
+ class SpsBill::Shell
2
+ attr_accessor :options, :fileset
3
+
4
+ # command line options definition
5
+ OPTIONS = %w(help verbose csv+ raw+ data=s)
6
+
7
+ # Usage message
8
+ def self.usage
9
+ puts <<-EOS
10
+
11
+ SP Services Bill Scanner v#{SpsBill::Version::STRING}
12
+ ===================================
13
+
14
+ Usage:
15
+ sps_bill [options]
16
+
17
+ Command Options
18
+ -r | --raw raw data format (without headers)
19
+ -c | --csv output in CSV format (default)
20
+ -d= | --data=[charges,electricity,gas,water,all]
21
+
22
+ EOS
23
+ end
24
+
25
+ # +new+
26
+ def initialize(options)
27
+ @fileset = ARGV
28
+ @options = (options||{}).each{|k,v| {k => v} }
29
+ end
30
+
31
+ def run
32
+ if options[:help]
33
+ self.class.usage
34
+ return
35
+ end
36
+ case options[:data]
37
+ when /^c/
38
+ export(:total_amounts)
39
+ when /^e/
40
+ export(:electricity_usages)
41
+ when /^g/
42
+ export(:gas_usages)
43
+ when /^w/
44
+ export(:water_usages)
45
+ # when /^a/
46
+ else
47
+ export(:all_data)
48
+ end
49
+ end
50
+
51
+ def bills
52
+ @bills ||= SpsBill::BillCollection.load(fileset)
53
+ end
54
+
55
+ def export(dataset_selector)
56
+ format_header bills.headers(dataset_selector)
57
+ format_rows bills.send(dataset_selector)
58
+ end
59
+
60
+ def format_rows(data)
61
+ data.each do |row|
62
+ puts row.join(',')
63
+ end
64
+ end
65
+
66
+ def format_header(data)
67
+ return if options[:raw]
68
+ puts data.join(',')
69
+ end
70
+
71
+ end
@@ -0,0 +1,9 @@
1
+ module SpsBill
2
+ class Version
3
+ MAJOR = 0
4
+ MINOR = 1
5
+ PATCH = 0
6
+
7
+ STRING = [MAJOR, MINOR, PATCH].compact.join('.')
8
+ end
9
+ end
data/lib/sps_bill.rb ADDED
@@ -0,0 +1,13 @@
1
+ require 'pdf-reader'
2
+ require 'pdf/object_hash'
3
+ require 'pdf/positional_text_receiver'
4
+ require 'pdf/textangle'
5
+ require 'pdf/structured_reader'
6
+
7
+ module SpsBill
8
+ end
9
+ require 'sps_bill/version'
10
+ require 'sps_bill/bill_collection'
11
+ require 'sps_bill/bill_parser'
12
+ require 'sps_bill/bill'
13
+ require 'sps_bill/shell'
File without changes
@@ -0,0 +1,20 @@
1
+ #!/bin/bash
2
+ #
3
+ # Demonstrates how to invoke the sps_bill command line to extract
4
+ # all billing data for a set of files
5
+ #
6
+ # Example Usage:
7
+ #
8
+ # scan_all_bills.sh my_bills/*.pdf > my_bill_data.csv
9
+ #
10
+ scriptPath=${0%/*}/
11
+
12
+ v_files=${1:-help}
13
+
14
+ if [ "${v_files}" == "help" ]
15
+ then
16
+ echo "usage: scan_all_bills.sh ./path/to_pdf_files*.pdf"
17
+ exit
18
+ fi
19
+
20
+ ${scriptPath}../bin/sps_bill --data=all $*
File without changes
@@ -0,0 +1,71 @@
1
+ <html>
2
+ <head></head>
3
+ %PDF-1.3
4
+ %����
5
+ 1 0 obj
6
+ << /Creator <feff0050007200610077006e>
7
+ /Producer <feff0050007200610077006e>
8
+ >>
9
+ endobj
10
+ 2 0 obj
11
+ << /Type /Catalog
12
+ /Pages 3 0 R
13
+ >>
14
+ endobj
15
+ 3 0 obj
16
+ << /Type /Pages
17
+ /Count 1
18
+ /Kids [5 0 R]
19
+ >>
20
+ endobj
21
+ 4 0 obj
22
+ << /Length 157
23
+ >>
24
+ stream
25
+ q
26
+
27
+ BT
28
+ 36 747.384 Td
29
+ /F1.0 12 Tf
30
+ [<546869732050444620636f6e7461696e73206a756e6b20626566> 30 <6f72652074686520252d504446206d6172> -15 <6b> 20 <6572>] TJ
31
+ ET
32
+
33
+ Q
34
+
35
+ endstream
36
+ endobj
37
+ 5 0 obj
38
+ << /Type /Page
39
+ /Parent 3 0 R
40
+ /MediaBox [0 0 612.0 792.0]
41
+ /Contents 4 0 R
42
+ /Resources << /ProcSet [/PDF /Text /ImageB /ImageC /ImageI]
43
+ /Font << /F1.0 6 0 R
44
+ >>
45
+ >>
46
+ >>
47
+ endobj
48
+ 6 0 obj
49
+ << /Type /Font
50
+ /Subtype /Type1
51
+ /BaseFont /Helvetica
52
+ /Encoding /WinAnsiEncoding
53
+ >>
54
+ endobj
55
+ xref
56
+ 0 7
57
+ 0000000000 65535 f
58
+ 0000000015 00000 n
59
+ 0000000109 00000 n
60
+ 0000000158 00000 n
61
+ 0000000215 00000 n
62
+ 0000000423 00000 n
63
+ 0000000601 00000 n
64
+ trailer
65
+ << /Size 7
66
+ /Root 2 0 R
67
+ /Info 1 0 R
68
+ >>
69
+ startxref
70
+ 698
71
+ %%EOF
File without changes
@@ -0,0 +1,48 @@
1
+ # rename this file to expectations.yml and use it to
2
+ # test all your SPS bill files that you copy to spec/fixtures/personal_pdf_samples.
3
+ #
4
+ # For each bill file, create a section below (like the examples shown) and
5
+ # update it with the actual values that should come from the bill.
6
+ #
7
+ # This is a YAML-format file, so beware that indentation is significant
8
+ ---
9
+ my_sps_bill-2012-02.pdf:
10
+ :account_number: '8123123123'
11
+ :invoice_date: 2012-02-29
12
+ :invoice_month: 2012-02-01
13
+ :total_amount: 251.44
14
+ :electricity_usage:
15
+ - :kwh: 4.0
16
+ :rate: 0.241
17
+ :amount: 0.97
18
+ - :kwh: 616.0
19
+ :rate: 0.2558
20
+ :amount: 157.57
21
+ :gas_usage:
22
+ - :kwh: 18.0
23
+ :rate: 0.1799
24
+ :amount: 3.24
25
+ :water_usage:
26
+ - :cubic_m: 36.1
27
+ :rate: 1.17
28
+ :amount: 42.24
29
+ - :cubic_m: -3.0
30
+ :rate: 1.4
31
+ :amount: -4.2
32
+ my_sps_bill-2012-03.pdf:
33
+ :account_number: '8123123123'
34
+ :invoice_date: 2012-03-31
35
+ :invoice_month: 2012-03-01
36
+ :total_amount: 235.7
37
+ :electricity_usage:
38
+ - :kwh: 519.0
39
+ :rate: 0.2558
40
+ :amount: 132.76
41
+ :gas_usage:
42
+ - :kwh: 15.0
43
+ :rate: 0.1799
44
+ :amount: 2.7
45
+ :water_usage:
46
+ - :cubic_m: 38.7
47
+ :rate: 1.17
48
+ :amount: 45.28
@@ -0,0 +1,74 @@
1
+ require 'spec_helper'
2
+ include PdfSamplesHelper
3
+
4
+ describe "Personal PDF Samples" do
5
+
6
+ if personal_pdf_sample_names.empty?
7
+ pending %(
8
+
9
+ You can place the PDFs of your own bills in spec/fixtures/personal_pdf_samples.
10
+ They will be tested when you run the specs, but are hidden from being added to the
11
+ git repository.
12
+
13
+ )
14
+ end
15
+
16
+ # This will scan all *.pdf files in spec/fixtures/personal_pdf_samples
17
+ # and do basic verification of the file structure without any effort from you.
18
+ personal_pdf_sample_names.each do |sample|
19
+ describe sample do
20
+ let(:sample_name) { sample }
21
+ let(:bill) { SpsBill::Bill.new(sample_name) }
22
+
23
+ it_behaves_like "has a valid reader", :bill
24
+ it_behaves_like "has a valid account number", :bill
25
+ it_behaves_like "has a valid invoice date", :bill
26
+
27
+
28
+ end
29
+ end
30
+
31
+ # This will read spec/fixtures/personal_pdf_samples/expectations.yml
32
+ # and and test according to the definitions it contains.
33
+ #
34
+ # See spec/fixtures/personal_pdf_samples/expectations.yml.sample
35
+ # for details on how to setup the expectations.yml file
36
+ #
37
+ personal_pdf_sample_expectations.each do |sample_name,expectations|
38
+ describe sample_name do
39
+ let(:sample_file) { personal_pdf_sample_path.join(sample_name).to_s }
40
+ let(:bill) { SpsBill::Bill.new(sample_file) }
41
+ subject { bill }
42
+
43
+ if expectations[:account_number]
44
+ its(:account_number) { should eql(expectations[:account_number])}
45
+ end
46
+
47
+ if expectations[:total_amount]
48
+ its(:total_amount) { should eql(expectations[:total_amount])}
49
+ end
50
+
51
+ if expectations[:invoice_date]
52
+ its(:invoice_date) { should eql(expectations[:invoice_date])}
53
+ end
54
+
55
+ if expectations[:invoice_month]
56
+ its(:invoice_month) { should eql(expectations[:invoice_month])}
57
+ end
58
+
59
+ if expectations[:electricity_usage]
60
+ its(:electricity_usage) { should eql(expectations[:electricity_usage])}
61
+ end
62
+
63
+ if expectations[:gas_usage]
64
+ its(:gas_usage) { should eql(expectations[:gas_usage])}
65
+ end
66
+
67
+ if expectations[:water_usage]
68
+ its(:water_usage) { should eql(expectations[:water_usage])}
69
+ end
70
+
71
+ end
72
+ end
73
+
74
+ end
@@ -0,0 +1,24 @@
1
+ require 'sps_bill'
2
+
3
+ # Requires supporting files with custom matchers and macros, etc,
4
+ # in ./support/ and its subdirectories.
5
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
6
+
7
+ RSpec.configure do |config|
8
+ # == Mock Framework
9
+ #
10
+ # If you prefer to use mocha, flexmock or RR, uncomment the appropriate line:
11
+ #
12
+ # config.mock_with :mocha
13
+ # config.mock_with :flexmock
14
+ # config.mock_with :rr
15
+ config.mock_with :rspec
16
+
17
+ # Remove this line if you're not using ActiveRecord or ActiveRecord fixtures
18
+ # config.fixture_path = "#{::Rails.root}/spec/fixtures"
19
+
20
+ # If you're not using ActiveRecord, or you'd prefer not to run each of your
21
+ # examples within a transaction, remove the following line or assign false
22
+ # instead of true.
23
+ # config.use_transactional_fixtures = true
24
+ end
@@ -0,0 +1,31 @@
1
+ shared_examples_for "has a valid reader" do |resource_key|
2
+ # args:
3
+ # +resource_key+ is the sym for the resource to test
4
+ describe "#reader" do
5
+ let(:resource) { eval "#{resource_key}" }
6
+ let(:reader) { resource.reader }
7
+ subject { reader }
8
+ it { should be_a(PDF::StructuredReader) }
9
+ end
10
+ end
11
+
12
+ shared_examples_for "has a valid account number" do |resource_key|
13
+ # args:
14
+ # +resource_key+ is the sym for the resource to test
15
+ describe "#account_number" do
16
+ let(:resource) { eval "#{resource_key}" }
17
+ subject { resource.account_number }
18
+ it { should be_a(String) }
19
+ it { should match(/^\d+$/) }
20
+ end
21
+ end
22
+
23
+ shared_examples_for "has a valid invoice date" do |resource_key|
24
+ # args:
25
+ # +resource_key+ is the sym for the resource to test
26
+ describe "#invoice_date" do
27
+ let(:resource) { eval "#{resource_key}" }
28
+ subject { resource.invoice_date }
29
+ it { should be_a(Date) }
30
+ end
31
+ end
@@ -0,0 +1,37 @@
1
+ require 'pathname'
2
+ require 'yaml'
3
+
4
+ module PdfSamplesHelper
5
+
6
+ def pdf_sample_path
7
+ Pathname.new(File.dirname(__FILE__)).join('..','fixtures','pdf_samples')
8
+ end
9
+
10
+ def personal_pdf_sample_path
11
+ Pathname.new(File.dirname(__FILE__)).join('..','fixtures','personal_pdf_samples')
12
+ end
13
+
14
+ def pdf_sample_name
15
+ pdf_sample_path.join('bill_a.pdf').to_s
16
+ end
17
+
18
+ def junk_prefix_pdf_sample_name
19
+ pdf_sample_path.join('junk_prefix.pdf').to_s
20
+ end
21
+
22
+ def personal_pdf_sample_names
23
+ Dir["#{File.dirname(__FILE__)}/../fixtures/personal_pdf_samples/*.pdf"]
24
+ end
25
+
26
+ def personal_pdf_sample_expectations_path
27
+ Pathname.new(File.dirname(__FILE__)).join('..','fixtures','personal_pdf_samples','expectations.yml')
28
+ end
29
+
30
+ def personal_pdf_sample_expectations
31
+ begin
32
+ YAML.load_file personal_pdf_sample_expectations_path
33
+ rescue
34
+ []
35
+ end
36
+ end
37
+ end