bankjob 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,190 @@
1
+
2
+ require 'rubygems'
3
+ require 'bankjob' # this require will pull in all the classes we need
4
+ require 'base_scraper' # this defines scraper that BpiScraper extends
5
+
6
+ include Bankjob # access the namespace of Bankjob
7
+
8
+ ##
9
+ # BpiScraper is a scraper tailored to the BPI bank in Portugal (www.bpinet.pt).
10
+ # It takes advantage of the BaseScraper to create the mechanize agent,
11
+ # then followins the basic recipe there of first loading the tranasctions page
12
+ # then parsing it.
13
+ #
14
+ # In addition to actually working for the BPI online banking, this class serves
15
+ # as an example of how to build your own scraper.
16
+ #
17
+ # BpiScraper expects the user name and password to be passed on the command line
18
+ # using -scraper_args "user password" (with a space between them).
19
+ #
20
+ class BpiScraper < BaseScraper
21
+
22
+ currency "EUR" # Set the currency as euros
23
+ decimal "," # BPI statements use commas as separators - this is used by the real_amount method
24
+ account_number "1234567" # override this with a real accoun number
25
+ account_type Statement::CHECKING # this is the default anyway
26
+
27
+ # This rule detects ATM withdrawals and modifies
28
+ # the description and sets the the type
29
+ transaction_rule do |tx|
30
+ if (tx.real_amount < 0)
31
+ if tx.raw_description =~ /LEV.*ATM ELEC\s+\d+\/\d+\s+/i
32
+ tx.description = "Multibanco withdrawal at #{$'}"
33
+ tx.type = Transaction::ATM
34
+ end
35
+ end
36
+ end
37
+
38
+ # This rule detects checque payments and modifies the description
39
+ # and sets the type
40
+ transaction_rule do |tx|
41
+ if tx.raw_description =~ /CHEQUE\s+(\d+)/i
42
+ cheque_number = $+ # $+ holds the last group of the match which is (\d+)
43
+ # change the description but append $' in case there was trailing text after the cheque no
44
+ tx.description = "Cheque ##{cheque_number} withdrawn #{$'}"
45
+ tx.type = Transaction::CHECK
46
+ tx.check_number = cheque_number
47
+ end
48
+ end
49
+
50
+ # This rule goes last and sets the description of transactions
51
+ # that haven't had their description to the raw description after
52
+ # changing the words to have capital letters only on the first word.
53
+ # (Note that +description+ will default to being the same as +raw_description+
54
+ # anyway - this rule is only for making the all uppercase output less ugly)
55
+ # The payee is also fixed in this way
56
+ transaction_rule(-999) do |tx|
57
+ if (tx.description == tx.raw_description)
58
+ tx.description = Bankjob.capitalize_words(tx.raw_description)
59
+ end
60
+ end
61
+
62
+ # Some constants for the URLs and main elements in the BPI bank app
63
+ LOGIN_URL = 'https://www.bpinet.pt/'
64
+ TRANSACTIONS_URL = 'https://www.bpinet.pt/areaInf/consultas/Movimentos/Movimentos.asp'
65
+
66
+ ##
67
+ # Uses the mechanize web +agent+ to fetch the page holding the most recent
68
+ # bank transactions and returns it.
69
+ # This overrides (implements) +fetch_transactions_page+ in BaseScraper
70
+ #
71
+ def fetch_transactions_page(agent)
72
+ login(agent)
73
+ logger.info("Logged in, now navigating to transactions on #{TRANSACTIONS_URL}.")
74
+ transactions_page = agent.get(TRANSACTIONS_URL)
75
+ if (transactions_page.nil?)
76
+ raise "BPI Scraper failed to load the transactions page at #{TRANSACTIONS_URL}"
77
+ end
78
+ return transactions_page
79
+ end
80
+
81
+
82
+ ##
83
+ # Parses the BPI page listing about a weeks worth of transactions
84
+ # and creates a Transaction for each one, putting them together
85
+ # in a Statement.
86
+ # Overrides (implements) +parse_transactions_page+ in BaseScraper.
87
+ #
88
+ def parse_transactions_page(transactions_page)
89
+ begin
90
+ statement = create_statement
91
+
92
+ # Find the closing balance avaliable and accountable
93
+ # Get from this:
94
+ # <td valign="middle" width="135" ALIGN="left" class="TextoAzulBold">Saldo Disponível:</td>
95
+ # <td valign="middle" width="110" ALIGN="right">1.751,31&nbsp;EUR</td>
96
+ # to 1751,31
97
+ available_cell = (transactions_page/"td").select { |ele| ele.inner_text =~ /^Saldo Dispon/ }.first.next_sibling
98
+ statement.closing_available = available_cell.inner_text.scan(/[\d.,]+/)[0].gsub(/\./,"")
99
+ account_balance_cell = (transactions_page/"td").select { |ele| ele.inner_text =~ /^Saldo Contab/ }.first.next_sibling
100
+ statement.closing_balance = account_balance_cell.inner_text.scan(/[\d.,]+/)[0].gsub(/\./,"")
101
+
102
+ transactions = []
103
+
104
+ # find the first header with the CSS class "Laranja" as this will be the first
105
+ # header in the transactions table
106
+ header = (transactions_page/"td.Laranja").first
107
+
108
+ # the table element is the grandparent element of this header (the row is the parent)
109
+ table = header.parent.parent
110
+
111
+ # each row with the valign attribute set to "top" holds a transaction
112
+ rows = (table/"tr[@valign=top]")
113
+ rows.each do |row|
114
+ transaction = create_transaction # use the support method because it sets the separator
115
+
116
+ # collect all of the table cells' inner html in an array (stripping leading/trailing spaces)
117
+ data = (row/"td").collect{ |cell| cell.inner_html.strip }
118
+
119
+ # the first (0th) column holds the date
120
+ transaction.date = data[0]
121
+
122
+ # the 2nd column holds the value date - but it's often empty
123
+ # in which case we set it to nil
124
+ vdate = data[1]
125
+ if vdate.nil? or vdate.length == 0 or vdate.strip == "&nbsp;"
126
+ transaction.value_date = nil
127
+ else
128
+ transaction.value_date = vdate
129
+ end
130
+
131
+ # the transaction raw_description is in the 3rd column
132
+ transaction.raw_description = data[2]
133
+
134
+ # the 4th column holds the transaction amount (with comma as decimal place)
135
+ transaction.amount = data[3]
136
+
137
+ # the new balance is in the last column
138
+ transaction.new_balance=data[4]
139
+
140
+ # add thew new transaction to the array
141
+ transactions << transaction
142
+ # break if $debug
143
+ end
144
+ rescue => exception
145
+ msg = "Failed to parse the transactions page at due to exception: #{exception.message}\nCheck your user name and password."
146
+ logger.fatal(msg);
147
+ logger.debug(exception)
148
+ logger.debug("Failed parsing transactions page:")
149
+ logger.debug("--------------------------------")
150
+ logger.debug(transactions_page) #.body
151
+ logger.debug("--------------------------------")
152
+ abort(msg)
153
+ end
154
+
155
+ # set the transactions on the statement
156
+ statement.transactions = transactions
157
+ return statement
158
+ end
159
+
160
+ ##
161
+ # Logs into the BPI banking app by finding the form
162
+ # setting the name and password and submitting it then
163
+ # waits a bit.
164
+ #
165
+ def login(agent)
166
+ logger.info("Logging in to #{LOGIN_URL}.")
167
+ if (scraper_args)
168
+ username, password = *scraper_args
169
+ end
170
+ raise "Login failed for BPI Scraper - pass user name and password using -scraper_args \"user <space> pass\"" unless (username and password)
171
+
172
+ # navigate to the login page
173
+ login_page = agent.get(LOGIN_URL)
174
+
175
+ # find login form - it's called 'signOn' - fill it out and submit it
176
+ form = login_page.form('signOn')
177
+
178
+ # username and password are taken from the commandline args, set them
179
+ # on USERID and PASSWORD which are the element names that the web page
180
+ # form uses to identify the form fields
181
+ form.USERID = username
182
+ form.PASSWORD = password
183
+
184
+ # submit the form - same as the user hitting the Login button
185
+ agent.submit(form)
186
+ sleep 3 # wait while the login takes effect
187
+ end
188
+ end # class BpiScraper
189
+
190
+
@@ -0,0 +1,15 @@
1
+ equire File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+ require 'bankjob/cli'
3
+
4
+ describe Bankjob::CLI, "execute" do
5
+ before(:each) do
6
+ @stdout_io = StringIO.new
7
+ Bankjob::CLI.execute(@stdout_io, [])
8
+ @stdout_io.rewind
9
+ @stdout = @stdout_io.read
10
+ end
11
+
12
+ it "should do something" do
13
+ @stdout.should_not =~ /To update this executable/
14
+ end
15
+ end
@@ -0,0 +1 @@
1
+ --colour
@@ -0,0 +1,10 @@
1
+ begin
2
+ require 'spec'
3
+ rescue LoadError
4
+ require 'rubygems'
5
+ gem 'rspec'
6
+ require 'spec'
7
+ end
8
+
9
+ $:.unshift(File.dirname(__FILE__) + '/../lib')
10
+ require 'bankjob.rb'
@@ -0,0 +1,121 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+ #require File.expand_path(File.dirname(__FILE__) + '/../lib/bankjob.rb')
3
+
4
+ include Bankjob
5
+
6
+ # Test the Statement merging in particular
7
+ describe Statement do
8
+ before(:each) do
9
+
10
+ @tx1 = Transaction.new(",")
11
+ @tx1.date = "20080730000000"
12
+ @tx1.value_date = "20080731145906"
13
+ @tx1.raw_description = "1 Stamp duty 001"
14
+ @tx1.amount = "-2,40"
15
+ @tx1.new_balance = "1.087,43"
16
+
17
+
18
+ @tx2 = Transaction.new(",")
19
+ @tx2.date = "0080729000000"
20
+ @tx2.value_date = "20080731145906"
21
+ @tx2.raw_description = "2 Interest payment 001"
22
+ @tx2.amount = "-59,94"
23
+ @tx2.new_balance = "1.089,83"
24
+
25
+
26
+ @tx3 = Transaction.new(",")
27
+ @tx3.date = "20080208000000"
28
+ @tx3.value_date = "20080731145906"
29
+ @tx3.raw_description = "3 Load payment 001"
30
+ @tx3.amount = "-256,13"
31
+ @tx3.new_balance = "1.149,77"
32
+
33
+
34
+ @tx4 = Transaction.new(",")
35
+ @tx4.date = "20080207000000"
36
+ @tx4.value_date = "20080731145906"
37
+ @tx4.raw_description = "4 Transfer to bank 2"
38
+ @tx4.amount = "-1.000,00"
39
+ @tx4.new_balance = "1.405,90"
40
+
41
+
42
+ @tx5 = Transaction.new(",")
43
+ @tx5.date = "20080209000000"
44
+ @tx5.value_date = "20080731145906"
45
+ @tx5.raw_description = "5 Internet payment 838"
46
+ @tx5.amount = "-32,07"
47
+ @tx5.new_balance = "1.405,90"
48
+
49
+ # the lot
50
+ @s12345 = Statement.new
51
+ @s12345.transactions = [ @tx1.dup, @tx2.dup, @tx3.dup, @tx4.dup, @tx5.dup]
52
+
53
+ # first 2
54
+ @s12 = Statement.new
55
+ @s12.transactions = [ @tx1.dup, @tx2.dup]
56
+
57
+ # middle 1
58
+ @s3 = Statement.new
59
+ @s3.transactions = [ @tx3.dup]
60
+
61
+ # last 2
62
+ @s45 = Statement.new
63
+ @s45.transactions = [ @tx4.dup, @tx5.dup]
64
+
65
+ # first 3
66
+ @s123 = Statement.new
67
+ @s123.transactions = [ @tx1.dup, @tx2.dup, @tx3.dup]
68
+
69
+ # last 4, overlaps with 23 of s123
70
+ @s2345 = Statement.new
71
+ @s2345.transactions = [ @tx2.dup, @tx3.dup, @tx4.dup, @tx5.dup]
72
+
73
+ # 2nd and last - overlaps non-contiguously with s123
74
+ @s25 = Statement.new
75
+ @s25.transactions = [ @tx2.dup, @tx5.dup]
76
+
77
+ end
78
+
79
+ it "should merge consecutive satements properly" do
80
+ @s123.merge(@s45).should == @s12345
81
+ end
82
+
83
+ it "should merge overlapping statments properly" do
84
+ #@s123.merge(@s2345).transactions.each { |tx| print "#{tx.to_s}, "}
85
+ @s123.merge(@s2345).should == @s12345
86
+ end
87
+
88
+ it "should merge a statement with a duplicate of itself without changing it" do
89
+ @s123.merge(@s123.dup).should == @s123
90
+ end
91
+
92
+
93
+ it "should merge non-contiguous with an error" do
94
+ m = @s123.merge(@s25)
95
+ m.transactions.each { |tx| print "#{tx.to_s}, "}
96
+ end
97
+
98
+ it "should read back a satement from csv as it was written" do
99
+ csv = @s123.to_csv
100
+ statement = Statement.new()
101
+ statement.from_csv(csv, ",")
102
+ statement.should == @s123
103
+ end
104
+
105
+ it "should read back and merge a statement with itself without change" do
106
+ csv = @s123.to_csv
107
+ statement = Statement.new()
108
+ statement.from_csv(csv, ",")
109
+ m = @s123.merge(statement)
110
+ m.should == @s123
111
+ end
112
+
113
+ it "should write, read, merge and write a statement without changing it" do
114
+ csv = @s123.to_csv
115
+ statement = Statement.new()
116
+ m = @s123.merge(statement)
117
+ m_csv = m.to_csv
118
+ m_csv.should == csv
119
+ end
120
+ end
121
+
@@ -0,0 +1,81 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ require 'bankjob.rb'
4
+ include Bankjob
5
+
6
+ describe Transaction do
7
+ before(:each) do
8
+ @tx1 = Transaction.new()
9
+ @tx1.date = "30-7-2008"
10
+ @tx1.value_date = "20080731145906"
11
+ @tx1.raw_description = "Some tax thing 10493"
12
+ @tx1.amount = "-2,40"
13
+ @tx1.new_balance = "1.087,43"
14
+
15
+ @tx1_copy = Transaction.new()
16
+ @tx1_copy.date = "30-7-2008"
17
+ @tx1_copy.value_date = "20080731145906"
18
+ @tx1_copy.raw_description = "Some tax thing 10493"
19
+ @tx1_copy.amount = "-2,40"
20
+ @tx1_copy.new_balance = "1.087,43"
21
+
22
+ @tx1_dup = @tx1.dup
23
+
24
+ @tx2 = Transaction.new()
25
+ @tx2.date = "0080729000000"
26
+ @tx2.value_date = "20080731145906"
27
+ @tx2.raw_description = "Interest payment"
28
+ @tx2.amount = "-59,94"
29
+ @tx2.new_balance = "1.089,83"
30
+ end
31
+
32
+ it "should generate the same ofx_id as its copy" do
33
+ puts "tx1: #{@tx1.to_s}\n-----"
34
+ puts "tx1_copy: #{@tx1.to_s}"
35
+ @tx1.ofx_id.should == @tx1_copy.ofx_id
36
+ puts "#{@tx1.ofx_id} == #{@tx1_copy.ofx_id}"
37
+ end
38
+
39
+ it "should generate the same ofx_id as its duplicate" do
40
+ @tx1.ofx_id.should == @tx1_dup.ofx_id
41
+ end
42
+
43
+
44
+ it "should be == to its duplicate" do
45
+ @tx1.should == @tx1_dup
46
+ end
47
+
48
+ it "should be == to its identical copy" do
49
+ @tx1.should == @tx1_copy
50
+ end
51
+
52
+ it "should not == a different transaction" do
53
+ @tx1.should_not == @tx2
54
+ end
55
+
56
+ it "should be eql to its duplicate (necessary for merging)" do
57
+ @tx1.should eql(@tx1_dup)
58
+ end
59
+
60
+ it "should not be equal to its duplicate" do
61
+ @tx1.should_not equal(@tx1_dup)
62
+ end
63
+
64
+ it "should be === to its duplicate" do
65
+ @tx1.should === @tx1_dup
66
+ end
67
+
68
+ it "should have the same hash as its duplicate" do
69
+ @tx1.hash.should == @tx1_dup.hash
70
+ end
71
+
72
+ it "should convert 1,000,000.32 to 1000000.32 when decimal separator is ." do
73
+ Bankjob.string_to_float("1,000,000.32", ".").should == 1000000.32
74
+ end
75
+
76
+ it "should convert 1.000.000,32 to 1000000.32 when decimal separator is ," do
77
+ Bankjob.string_to_float("1.000.000,32", ",").should == 1000000.32
78
+ end
79
+
80
+ end
81
+
metadata ADDED
@@ -0,0 +1,114 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bankjob
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.5.0
5
+ platform: ruby
6
+ authors:
7
+ - rhubarb
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-04-11 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hpricot
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0.6"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: mechanize
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 0.7.5
34
+ version:
35
+ - !ruby/object:Gem::Dependency
36
+ name: builder
37
+ type: :runtime
38
+ version_requirement:
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: 2.1.2
44
+ version:
45
+ - !ruby/object:Gem::Dependency
46
+ name: fastercsv
47
+ type: :runtime
48
+ version_requirement:
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: 1.2.3
54
+ version:
55
+ description: Bankjob is a command-line ruby program for scraping online banking sites and producing statements in OFX (Open Fincancial Exchange) or CSV (Comma Separated Values) formats. Bankjob was created for people like me who want to get their bank data into a 3rd party application but whose bank does not support downloads in OFX format. It's also useful for keeping a permanent store of bank statements on your computer for reading in Excel (vs filing paper statements)
56
+ email:
57
+ - rhubarb.bankjob@gmail.com
58
+ executables:
59
+ - bankjob
60
+ extensions: []
61
+
62
+ extra_rdoc_files:
63
+ - History.txt
64
+ - PostInstall.txt
65
+ - README.rdoc
66
+ files:
67
+ - History.txt
68
+ - PostInstall.txt
69
+ - README.rdoc
70
+ - bin/bankjob
71
+ - lib/bankjob.rb
72
+ - lib/bankjob/bankjob_runner.rb
73
+ - lib/bankjob/cli.rb
74
+ - lib/bankjob/payee.rb
75
+ - lib/bankjob/scraper.rb
76
+ - lib/bankjob/statement.rb
77
+ - lib/bankjob/support.rb
78
+ - lib/bankjob/transaction.rb
79
+ - scrapers/base_scraper.rb
80
+ - scrapers/bpi_scraper.rb
81
+ - spec/bankjob_cli_spec.rb
82
+ - spec/spec.opts
83
+ - spec/spec_helper.rb
84
+ - spec/statement_spec.rb
85
+ - spec/transaction_spec.rb
86
+ has_rdoc: true
87
+ homepage: http://bankjob.rubyforge.org/
88
+ post_install_message: PostInstall.txt
89
+ rdoc_options:
90
+ - --main
91
+ - README.rdoc
92
+ require_paths:
93
+ - lib
94
+ required_ruby_version: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - ">="
97
+ - !ruby/object:Gem::Version
98
+ version: "0"
99
+ version:
100
+ required_rubygems_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ">="
103
+ - !ruby/object:Gem::Version
104
+ version: "0"
105
+ version:
106
+ requirements: []
107
+
108
+ rubyforge_project: bankjob
109
+ rubygems_version: 1.3.1
110
+ signing_key:
111
+ specification_version: 2
112
+ summary: Bankjob is a command-line ruby program for scraping online banking sites and producing statements in OFX (Open Fincancial Exchange) or CSV (Comma Separated Values) formats
113
+ test_files: []
114
+