bankjob 0.5.1 → 0.5.2
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +8 -1
- data/lib/bankjob.rb +1 -1
- data/lib/bankjob/bankjob_runner.rb +2 -2
- data/lib/bankjob/scraper.rb +7 -0
- data/lib/bankjob/statement.rb +117 -40
- data/lib/bankjob/support.rb +8 -5
- data/scrapers/bpi_scraper.rb +234 -205
- metadata +2 -2
data/History.txt
CHANGED
@@ -1,4 +1,11 @@
|
|
1
|
-
== 0.5.
|
1
|
+
== 0.5.2 2009-05-18
|
2
|
+
* 3 minor enhancements:
|
3
|
+
* Added the ability to set fake timestamps on transactions so that transactions that have 00:00 for their time stamp that occur on the same day will be in the correct order in Wesabe
|
4
|
+
To use this in your scraper, call statement#finish(true, true)
|
5
|
+
* bpi_scraper.rb now sets the account number on the statement after scraping it. It turns out this is important for uploading multiple accounts to Wesabe - the accounts get mixed if the account numbers are wrong
|
6
|
+
* the Statement#finish method also updates the balance and start and end dates of the statement after the last transaction is scraped. This used to be done automatically in the attribute getters but did not support statements with most-recent transaction last (only first).
|
7
|
+
|
8
|
+
=== 0.5.1 2009-04-20
|
2
9
|
* 1 minor enhancement:
|
3
10
|
* bpi_scraper.rb now accepts an optional third argument for the account number. Entering a number will cause the scraper to scrape that account rather than the default account.
|
4
11
|
|
data/lib/bankjob.rb
CHANGED
@@ -12,7 +12,7 @@ module Bankjob
|
|
12
12
|
logger = options.logger
|
13
13
|
|
14
14
|
if options.wesabe_help
|
15
|
-
Bankjob.wesabe_help(options.wesabe_args)
|
15
|
+
Bankjob.wesabe_help(options.wesabe_args, logger)
|
16
16
|
exit(0) # Wesabe help describes to the user how to use the wesabe options then quits
|
17
17
|
end
|
18
18
|
|
@@ -181,4 +181,4 @@ NEWFILEUID:NONE
|
|
181
181
|
return output_file
|
182
182
|
end
|
183
183
|
end # class BankjobRunner
|
184
|
-
end # module Bankjob
|
184
|
+
end # module Bankjob
|
data/lib/bankjob/scraper.rb
CHANGED
@@ -69,6 +69,13 @@ module Bankjob
|
|
69
69
|
# organized Transaction objects with clearer descriptions of the
|
70
70
|
# transaction, etc.
|
71
71
|
#
|
72
|
+
# +finish+ :: finishes a transaction by setting the balances and to and from dates
|
73
|
+
# based on the first and last transactions. Also, optionally, generates
|
74
|
+
# fake timestamps for transactions that have no time component in their
|
75
|
+
# dates. This is important for clients that use the timestamps to order
|
76
|
+
# the transactions correctly, and would otherwise mess up the order
|
77
|
+
# if all transactions on the same day were at the same time (E.g. Wesabe)
|
78
|
+
#
|
72
79
|
# Here is an example of a simple (but incomplete) scraper.
|
73
80
|
# Note that all of the scraping and parsing is in the +scrape_statement+ method, although
|
74
81
|
# a lot of the details of Hpricot parsing are left up to the imagination of the reader.
|
data/lib/bankjob/statement.rb
CHANGED
@@ -67,6 +67,14 @@ module Bankjob
|
|
67
67
|
# Use a constant to set this - defaults to CHECKING
|
68
68
|
attr_accessor :account_type
|
69
69
|
|
70
|
+
# the last date of the period the statement covers
|
71
|
+
# Translates to the OFX element DTEND
|
72
|
+
attr_accessor :to_date
|
73
|
+
|
74
|
+
# the first date of the period the statement covers
|
75
|
+
# Translates to the OFX element DTSTART
|
76
|
+
attr_accessor :from_date
|
77
|
+
|
70
78
|
##
|
71
79
|
# Creates a new empty Statement with no transactions.
|
72
80
|
# The +account_number+ must be specified as a 1-22 character string.
|
@@ -77,6 +85,8 @@ module Bankjob
|
|
77
85
|
@currency = currency
|
78
86
|
@transactions = []
|
79
87
|
@account_type = CHECKING
|
88
|
+
@closing_balance = nil
|
89
|
+
@closing_available = nil
|
80
90
|
end
|
81
91
|
|
82
92
|
##
|
@@ -148,45 +158,6 @@ module Bankjob
|
|
148
158
|
@transactions = merge_transactions(other)
|
149
159
|
end
|
150
160
|
|
151
|
-
##
|
152
|
-
# Returns the statement's start date.
|
153
|
-
# The +from_date+ is taken from the date of the last transaction in the statement
|
154
|
-
#
|
155
|
-
def from_date()
|
156
|
-
return nil if @transactions.empty?
|
157
|
-
@transactions.last.date
|
158
|
-
end
|
159
|
-
|
160
|
-
##
|
161
|
-
# Returns the statement's end date.
|
162
|
-
# The +to_date+ is taken from the date of the first transaction in the statement
|
163
|
-
#
|
164
|
-
def to_date()
|
165
|
-
return nil if @transactions.empty?
|
166
|
-
@transactions.first.date
|
167
|
-
end
|
168
|
-
|
169
|
-
##
|
170
|
-
# Returns the closing balance by looking at the
|
171
|
-
# new balance of the first transaction.
|
172
|
-
# If there are no transactions, +nil+ is returned.
|
173
|
-
#
|
174
|
-
def closing_balance()
|
175
|
-
return nil if @closing_balance.nil? and @transactions.empty?
|
176
|
-
@closing_balance ||= @transactions.first.new_balance
|
177
|
-
end
|
178
|
-
|
179
|
-
##
|
180
|
-
# Returns the closing available balance by looking at the
|
181
|
-
# new balance of the first transaction.
|
182
|
-
# If there are no transactions, +nil+ is returned.
|
183
|
-
# Note that this is the same value returned as +closing_balance+.
|
184
|
-
#
|
185
|
-
def closing_available()
|
186
|
-
return nil if @closing_available.nil? and @transactions.empty?
|
187
|
-
@closing_available ||= @transactions.first.new_balance
|
188
|
-
end
|
189
|
-
|
190
161
|
##
|
191
162
|
# Generates a CSV (comma separated values) string with a single
|
192
163
|
# row for each transaction.
|
@@ -342,6 +313,112 @@ module Bankjob
|
|
342
313
|
}
|
343
314
|
return buf
|
344
315
|
end
|
316
|
+
|
317
|
+
ONE_MINUTE = 60
|
318
|
+
ELEVEN_59_PM = 23 * 60 * 60 + 59 * 60 # seconds at 23:59
|
319
|
+
MIDDAY = 12 * 60 * 60
|
320
|
+
|
321
|
+
##
|
322
|
+
# Finishes the statement after scraping in two ways depending on the information
|
323
|
+
# that the scraper was able to obtain. Optionally have your scraper class call
|
324
|
+
# this after scraping is finished.
|
325
|
+
#
|
326
|
+
# This method:
|
327
|
+
#
|
328
|
+
# 1. Sets the closing balance and available_balance and the to_ and from_dates
|
329
|
+
# by using the first and last transactions in the list. Which transaction is
|
330
|
+
# used depends on whether +most_recent_first+ is true or false.
|
331
|
+
# The scraper may just set these directly in which case this may not be necessary.
|
332
|
+
#
|
333
|
+
# 2. If +fake_times+ is true time-stamps are invented and added to the transaction
|
334
|
+
# date attributes. This is useful if the website beings scraped shows dates, but
|
335
|
+
# not times, but has transactions listed in chronoligical arder.
|
336
|
+
# Without this process, the ofx generated has no proper no indication of the order of
|
337
|
+
# transactions that occurred in the same day other than the order in the statement
|
338
|
+
# and this may be ignored by the client. (Specifically, Wesabe will reorder transactions
|
339
|
+
# in the same day if they all appear to occur at the same time).
|
340
|
+
#
|
341
|
+
# Note that the algorithm to set the fake times is a little tricky. Assuming
|
342
|
+
# the transactionsa are most-recent-first, the first last transaction on each
|
343
|
+
# day is set at 11:59pm each transaction prior to that is one minute earlier.
|
344
|
+
#
|
345
|
+
# But for the first transactions in the statement, the first is set at a few
|
346
|
+
# minutes after midnight, then we count backward. (The actual number of minutes
|
347
|
+
# is based on the number of transactions + 1 to be sure it doesnt pass midnight)
|
348
|
+
#
|
349
|
+
# This is crucial because transactions for a given day will often span 2 or more
|
350
|
+
# statement. By starting just after midnight and going back to just before midnight
|
351
|
+
# we reduce the chance of overlap.
|
352
|
+
#
|
353
|
+
# If the to-date is the same as the from-date for a transaction, then we start at
|
354
|
+
# midday, so that prior and subsequent statements don't overlap.
|
355
|
+
#
|
356
|
+
# This simple algorithm basically guarantees no overlaps so long as:
|
357
|
+
# i. The number of transactions is small compared to the number of minutes in a day
|
358
|
+
# ii. A single day will not span more than 3 statements
|
359
|
+
#
|
360
|
+
# If the statement is most-recent-last (+most_recent_first = false+) the same
|
361
|
+
# algorithm is applied, only in reverse
|
362
|
+
#
|
363
|
+
def finish(most_recent_first, fake_times=false)
|
364
|
+
if !@transactions.empty? then
|
365
|
+
# if the user hasn't set the balances, set them to the first or last
|
366
|
+
# transaction balance depending on the order
|
367
|
+
if most_recent_first then
|
368
|
+
@closing_balance ||= transactions.first.new_balance
|
369
|
+
@closing_available ||= transactions.first.new_balance
|
370
|
+
@to_date ||= transactions.first.date
|
371
|
+
@from_date ||= transactions.last.date
|
372
|
+
else
|
373
|
+
@closing_balance ||= transactions.last.new_balance
|
374
|
+
@closing_available ||= transactions.last.new_balance
|
375
|
+
@to_date ||= transactions.last.date
|
376
|
+
@from_date ||= transactions.first.date
|
377
|
+
end
|
378
|
+
|
379
|
+
if fake_times and to_date.hour == 0 then
|
380
|
+
# the statement was unable to scrape times to go with the dates, but the
|
381
|
+
# client (say wesabe) will get the transaction order wrong if there are no
|
382
|
+
# times, so here we add times that order the transactions according to the
|
383
|
+
# order of the array of transactions
|
384
|
+
|
385
|
+
# the delta is 1 minute forward or backward fr
|
386
|
+
if to_date == from_date then
|
387
|
+
# all of the statement's transactions occur in the same day - to try to
|
388
|
+
# avoid overlap with subsequent or previous transacitons we group order them
|
389
|
+
# from 11am onward
|
390
|
+
seconds = MIDDAY
|
391
|
+
else
|
392
|
+
seconds = (transactions.length + 1) * 60
|
393
|
+
end
|
394
|
+
|
395
|
+
if most_recent_first then
|
396
|
+
yday = transactions.first.date.yday
|
397
|
+
start = 0
|
398
|
+
delta = 1
|
399
|
+
finish = transactions.length
|
400
|
+
else
|
401
|
+
yday = transactions.last.date.yday
|
402
|
+
start = transactions.length - 1
|
403
|
+
finish = -1
|
404
|
+
delta = -1
|
405
|
+
end
|
406
|
+
|
407
|
+
i = start
|
408
|
+
until i == finish
|
409
|
+
tx = transactions[i]
|
410
|
+
if tx.date.yday != yday
|
411
|
+
# starting a new day, begin the countdown from 23:59 again
|
412
|
+
yday = tx.date.yday
|
413
|
+
seconds = ELEVEN_59_PM
|
414
|
+
end
|
415
|
+
tx.date += seconds unless tx.date.hour > 0
|
416
|
+
seconds -= ONE_MINUTE
|
417
|
+
i += delta
|
418
|
+
end
|
419
|
+
end
|
420
|
+
end
|
421
|
+
end
|
345
422
|
|
346
423
|
def to_s
|
347
424
|
buf = "#{self.class}: close_bal = #{closing_balance}, avail = #{closing_available}, curr = #{currency}, transactions:"
|
@@ -352,4 +429,4 @@ module Bankjob
|
|
352
429
|
return buf
|
353
430
|
end
|
354
431
|
end # class Statement
|
355
|
-
end # module
|
432
|
+
end # module
|
data/lib/bankjob/support.rb
CHANGED
@@ -6,7 +6,7 @@ module Bankjob
|
|
6
6
|
|
7
7
|
##
|
8
8
|
# Takes a date-time as a string or as a Time or DateTime object and returns
|
9
|
-
# it as either a Time
|
9
|
+
# it as either a Time object.
|
10
10
|
#
|
11
11
|
# This is useful in the setter method of a date attribute allowing the date
|
12
12
|
# to be set as any type but stored internally as an object compatible with
|
@@ -14,8 +14,8 @@ module Bankjob
|
|
14
14
|
# (Bankjob::Transaction uses this internally in the setter for +date+ for example
|
15
15
|
#
|
16
16
|
def self.create_date_time(date_time_raw)
|
17
|
-
if (date_time_raw.
|
18
|
-
# It's already a Time
|
17
|
+
if (date_time_raw.is_a?(Time)) then
|
18
|
+
# It's already a Time
|
19
19
|
return date_time_raw
|
20
20
|
elsif (date_time_raw.to_s.strip.empty?)
|
21
21
|
# Nil or non dates are returned as nil
|
@@ -167,7 +167,7 @@ module Bankjob
|
|
167
167
|
# When used with Wesabe account and password, will log into Wesabe and list
|
168
168
|
# the users accounts, and suggest command line args to upload to each account.
|
169
169
|
#
|
170
|
-
def self.wesabe_help(wesabe_args)
|
170
|
+
def self.wesabe_help(wesabe_args, logger)
|
171
171
|
if (wesabe_args.nil? or wesabe_args.length != 2)
|
172
172
|
puts <<-EOF
|
173
173
|
Wesabe (http://www.wesabe.com) is an online bank account management tool (like Mint)
|
@@ -240,12 +240,15 @@ Troubleshooting:
|
|
240
240
|
end
|
241
241
|
end
|
242
242
|
rescue Exception => e
|
243
|
-
|
243
|
+
msg =<<-EOF
|
244
244
|
Failed to get Wesabe account information due to: #{e.message}.
|
245
245
|
Check your username and password or use:
|
246
246
|
bankjob --wesabe-help
|
247
247
|
with no arguments for more details.
|
248
248
|
EOF
|
249
|
+
logger.debug(msg)
|
250
|
+
logger.debug(e)
|
251
|
+
raise msg
|
249
252
|
end
|
250
253
|
end
|
251
254
|
end # wesabe_help
|
data/scrapers/bpi_scraper.rb
CHANGED
@@ -1,205 +1,234 @@
|
|
1
|
-
|
2
|
-
require 'rubygems'
|
3
|
-
require 'bankjob' # this require will pull in all the classes we need
|
4
|
-
require 'base_scraper' # this defines scraper that BpiScraper extends
|
5
|
-
|
6
|
-
include Bankjob # access the namespace of Bankjob
|
7
|
-
|
8
|
-
##
|
9
|
-
# BpiScraper is a scraper tailored to the BPI bank in Portugal (www.bpinet.pt).
|
10
|
-
# It takes advantage of the BaseScraper to create the mechanize agent,
|
11
|
-
# then followins the basic recipe there of first loading the tranasctions page
|
12
|
-
# then parsing it.
|
13
|
-
#
|
14
|
-
# In addition to actually working for the BPI online banking, this class serves
|
15
|
-
# as an example of how to build your own scraper.
|
16
|
-
#
|
17
|
-
# BpiScraper expects the user name and password to be passed on the command line
|
18
|
-
# using --scraper-args "user password" (with a space between them).
|
19
|
-
# Optionally, the account number can also be specified with the 3rd argument so:
|
20
|
-
# --scraper-args "user password 803030000001" causing that account to be selected
|
21
|
-
# before scraping the statement
|
22
|
-
#
|
23
|
-
class BpiScraper < BaseScraper
|
24
|
-
|
25
|
-
currency "EUR" # Set the currency as euros
|
26
|
-
decimal "," # BPI statements use commas as separators - this is used by the real_amount method
|
27
|
-
account_number "1234567" # override this with a real
|
28
|
-
account_type Statement::CHECKING # this is the default anyway
|
29
|
-
|
30
|
-
# This rule detects ATM withdrawals and modifies
|
31
|
-
# the description and sets the the type
|
32
|
-
transaction_rule do |tx|
|
33
|
-
if (tx.real_amount < 0)
|
34
|
-
if tx.raw_description =~ /LEV.*ATM ELEC\s+\d+\/\d+\s+/i
|
35
|
-
tx.description = "Multibanco withdrawal at #{$'}"
|
36
|
-
tx.type = Transaction::ATM
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
# This rule detects checque payments and modifies the description
|
42
|
-
# and sets the type
|
43
|
-
transaction_rule do |tx|
|
44
|
-
if tx.raw_description =~ /CHEQUE\s+(\d+)/i
|
45
|
-
cheque_number = $+ # $+ holds the last group of the match which is (\d+)
|
46
|
-
# change the description but append $' in case there was trailing text after the cheque no
|
47
|
-
tx.description = "Cheque ##{cheque_number} withdrawn #{$'}"
|
48
|
-
tx.type = Transaction::CHECK
|
49
|
-
tx.check_number = cheque_number
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
# This rule goes last and sets the description of transactions
|
54
|
-
# that haven't had their description to the raw description after
|
55
|
-
# changing the words to have capital letters only on the first word.
|
56
|
-
# (Note that +description+ will default to being the same as +raw_description+
|
57
|
-
# anyway - this rule is only for making the all uppercase output less ugly)
|
58
|
-
# The payee is also fixed in this way
|
59
|
-
transaction_rule(-999) do |tx|
|
60
|
-
if (tx.description == tx.raw_description)
|
61
|
-
tx.description = Bankjob.capitalize_words(tx.raw_description)
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
# Some constants for the URLs and main elements in the BPI bank app
|
66
|
-
LOGIN_URL = 'https://www.bpinet.pt/'
|
67
|
-
TRANSACTIONS_URL = 'https://www.bpinet.pt/areaInf/consultas/Movimentos/Movimentos.asp'
|
68
|
-
|
69
|
-
##
|
70
|
-
# Uses the mechanize web +agent+ to fetch the page holding the most recent
|
71
|
-
# bank transactions and returns it.
|
72
|
-
# This overrides (implements) +fetch_transactions_page+ in BaseScraper
|
73
|
-
#
|
74
|
-
def fetch_transactions_page(agent)
|
75
|
-
login(agent)
|
76
|
-
logger.info("Logged in, now navigating to transactions on #{TRANSACTIONS_URL}.")
|
77
|
-
transactions_page = agent.get(TRANSACTIONS_URL)
|
78
|
-
if (transactions_page.nil?)
|
79
|
-
raise "BPI Scraper failed to load the transactions page at #{TRANSACTIONS_URL}"
|
80
|
-
end
|
81
|
-
|
82
|
-
# If there is a third scraper arg, it is the account number and we use it
|
83
|
-
# to select the account on the transactions page
|
84
|
-
if (scraper_args and scraper_args.length > 2)
|
85
|
-
account = scraper_args[2]
|
86
|
-
# the account selector is the field 'contaCorrente' in the form 'form_mov'
|
87
|
-
Bankjob.select_and_submit(transactions_page, 'form_mov', 'contaCorrente', account)
|
88
|
-
sleep 1
|
89
|
-
# refetch the transactions page after selecting the account
|
90
|
-
transactions_page = agent.get(TRANSACTIONS_URL)
|
91
|
-
end
|
92
|
-
|
93
|
-
return transactions_page
|
94
|
-
end
|
95
|
-
|
96
|
-
|
97
|
-
##
|
98
|
-
# Parses the BPI page listing about a weeks worth of transactions
|
99
|
-
# and creates a Transaction for each one, putting them together
|
100
|
-
# in a Statement.
|
101
|
-
# Overrides (implements) +parse_transactions_page+ in BaseScraper.
|
102
|
-
#
|
103
|
-
def parse_transactions_page(transactions_page)
|
104
|
-
begin
|
105
|
-
statement = create_statement
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
#
|
111
|
-
#
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
#
|
120
|
-
#
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
#
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
#
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
transaction
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
#
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
#
|
200
|
-
|
201
|
-
|
202
|
-
end
|
203
|
-
|
204
|
-
|
205
|
-
|
1
|
+
|
2
|
+
require 'rubygems'
|
3
|
+
require 'bankjob' # this require will pull in all the classes we need
|
4
|
+
require 'base_scraper' # this defines scraper that BpiScraper extends
|
5
|
+
|
6
|
+
include Bankjob # access the namespace of Bankjob
|
7
|
+
|
8
|
+
##
|
9
|
+
# BpiScraper is a scraper tailored to the BPI bank in Portugal (www.bpinet.pt).
|
10
|
+
# It takes advantage of the BaseScraper to create the mechanize agent,
|
11
|
+
# then followins the basic recipe there of first loading the tranasctions page
|
12
|
+
# then parsing it.
|
13
|
+
#
|
14
|
+
# In addition to actually working for the BPI online banking, this class serves
|
15
|
+
# as an example of how to build your own scraper.
|
16
|
+
#
|
17
|
+
# BpiScraper expects the user name and password to be passed on the command line
|
18
|
+
# using --scraper-args "user password" (with a space between them).
|
19
|
+
# Optionally, the account number can also be specified with the 3rd argument so:
|
20
|
+
# --scraper-args "user password 803030000001" causing that account to be selected
|
21
|
+
# before scraping the statement
|
22
|
+
#
|
23
|
+
class BpiScraper < BaseScraper
|
24
|
+
|
25
|
+
currency "EUR" # Set the currency as euros
|
26
|
+
decimal "," # BPI statements use commas as separators - this is used by the real_amount method
|
27
|
+
account_number "1234567" # override this with a real account number
|
28
|
+
account_type Statement::CHECKING # this is the default anyway
|
29
|
+
|
30
|
+
# This rule detects ATM withdrawals and modifies
|
31
|
+
# the description and sets the the type
|
32
|
+
transaction_rule do |tx|
|
33
|
+
if (tx.real_amount < 0)
|
34
|
+
if tx.raw_description =~ /LEV.*ATM ELEC\s+\d+\/\d+\s+/i
|
35
|
+
tx.description = "Multibanco withdrawal at #{$'}"
|
36
|
+
tx.type = Transaction::ATM
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# This rule detects checque payments and modifies the description
|
42
|
+
# and sets the type
|
43
|
+
transaction_rule do |tx|
|
44
|
+
if tx.raw_description =~ /CHEQUE\s+(\d+)/i
|
45
|
+
cheque_number = $+ # $+ holds the last group of the match which is (\d+)
|
46
|
+
# change the description but append $' in case there was trailing text after the cheque no
|
47
|
+
tx.description = "Cheque ##{cheque_number} withdrawn #{$'}"
|
48
|
+
tx.type = Transaction::CHECK
|
49
|
+
tx.check_number = cheque_number
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
# This rule goes last and sets the description of transactions
|
54
|
+
# that haven't had their description to the raw description after
|
55
|
+
# changing the words to have capital letters only on the first word.
|
56
|
+
# (Note that +description+ will default to being the same as +raw_description+
|
57
|
+
# anyway - this rule is only for making the all uppercase output less ugly)
|
58
|
+
# The payee is also fixed in this way
|
59
|
+
transaction_rule(-999) do |tx|
|
60
|
+
if (tx.description == tx.raw_description)
|
61
|
+
tx.description = Bankjob.capitalize_words(tx.raw_description)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# Some constants for the URLs and main elements in the BPI bank app
|
66
|
+
LOGIN_URL = 'https://www.bpinet.pt/'
|
67
|
+
TRANSACTIONS_URL = 'https://www.bpinet.pt/areaInf/consultas/Movimentos/Movimentos.asp'
|
68
|
+
|
69
|
+
##
|
70
|
+
# Uses the mechanize web +agent+ to fetch the page holding the most recent
|
71
|
+
# bank transactions and returns it.
|
72
|
+
# This overrides (implements) +fetch_transactions_page+ in BaseScraper
|
73
|
+
#
|
74
|
+
def fetch_transactions_page(agent)
|
75
|
+
login(agent)
|
76
|
+
logger.info("Logged in, now navigating to transactions on #{TRANSACTIONS_URL}.")
|
77
|
+
transactions_page = agent.get(TRANSACTIONS_URL)
|
78
|
+
if (transactions_page.nil?)
|
79
|
+
raise "BPI Scraper failed to load the transactions page at #{TRANSACTIONS_URL}"
|
80
|
+
end
|
81
|
+
|
82
|
+
# If there is a third scraper arg, it is the account number and we use it
|
83
|
+
# to select the account on the transactions page
|
84
|
+
if (scraper_args and scraper_args.length > 2)
|
85
|
+
account = scraper_args[2]
|
86
|
+
# the account selector is the field 'contaCorrente' in the form 'form_mov'
|
87
|
+
Bankjob.select_and_submit(transactions_page, 'form_mov', 'contaCorrente', account)
|
88
|
+
sleep 1
|
89
|
+
# refetch the transactions page after selecting the account
|
90
|
+
transactions_page = agent.get(TRANSACTIONS_URL)
|
91
|
+
end
|
92
|
+
|
93
|
+
return transactions_page
|
94
|
+
end
|
95
|
+
|
96
|
+
|
97
|
+
##
|
98
|
+
# Parses the BPI page listing about a weeks worth of transactions
|
99
|
+
# and creates a Transaction for each one, putting them together
|
100
|
+
# in a Statement.
|
101
|
+
# Overrides (implements) +parse_transactions_page+ in BaseScraper.
|
102
|
+
#
|
103
|
+
def parse_transactions_page(transactions_page)
|
104
|
+
begin
|
105
|
+
statement = create_statement
|
106
|
+
|
107
|
+
account_number = get_account_number(transactions_page)
|
108
|
+
statement.account_number = account_number unless account_number.nil?
|
109
|
+
|
110
|
+
# Find the closing balance avaliable and accountable
|
111
|
+
# Get from this:
|
112
|
+
# <td valign="middle" width="135" ALIGN="left" class="TextoAzulBold">Saldo Disponível:</td>
|
113
|
+
# <td valign="middle" width="110" ALIGN="right">1.751,31 EUR</td>
|
114
|
+
# to 1751,31
|
115
|
+
# Commenting out balances for now to let the balance be taken from the
|
116
|
+
# top-most transaction - this keeps balances in synch with actual transactions
|
117
|
+
# and allows for statements created for past dates (the balance at the top of the
|
118
|
+
# page is always the current one, not the one for the last transaction on that page)
|
119
|
+
#available_cell = (transactions_page/"td").select { |ele| ele.inner_text =~ /^Saldo Dispon/ }.first.next_sibling
|
120
|
+
#statement.closing_available = available_cell.inner_text.scan(/[\d.,]+/)[0].gsub(/\./,"")
|
121
|
+
#account_balance_cell = (transactions_page/"td").select { |ele| ele.inner_text =~ /^Saldo Contab/ }.first.next_sibling
|
122
|
+
#statement.closing_balance = account_balance_cell.inner_text.scan(/[\d.,]+/)[0].gsub(/\./,"")
|
123
|
+
|
124
|
+
#transactions = []
|
125
|
+
|
126
|
+
# find the first header with the CSS class "Laranja" as this will be the first
|
127
|
+
# header in the transactions table
|
128
|
+
header = (transactions_page/"td.Laranja").first
|
129
|
+
|
130
|
+
# the table element is the grandparent element of this header (the row is the parent)
|
131
|
+
table = header.parent.parent
|
132
|
+
|
133
|
+
# each row with the valign attribute set to "top" holds a transaction
|
134
|
+
rows = (table/"tr[@valign=top]")
|
135
|
+
rows.each do |row|
|
136
|
+
transaction = create_transaction # use the support method because it sets the separator
|
137
|
+
|
138
|
+
# collect all of the table cells' inner html in an array (stripping leading/trailing spaces)
|
139
|
+
data = (row/"td").collect{ |cell| cell.inner_html.strip }
|
140
|
+
|
141
|
+
# the first (0th) column holds the date
|
142
|
+
transaction.date = data[0]
|
143
|
+
|
144
|
+
# the 2nd column holds the value date - but it's often empty
|
145
|
+
# in which case we set it to nil
|
146
|
+
vdate = data[1]
|
147
|
+
if vdate.nil? or vdate.length == 0 or vdate.strip == " "
|
148
|
+
transaction.value_date = nil
|
149
|
+
else
|
150
|
+
transaction.value_date = vdate
|
151
|
+
end
|
152
|
+
|
153
|
+
# the transaction raw_description is in the 3rd column
|
154
|
+
transaction.raw_description = data[2]
|
155
|
+
|
156
|
+
# the 4th column holds the transaction amount (with comma as decimal place)
|
157
|
+
transaction.amount = data[3]
|
158
|
+
|
159
|
+
# the new balance is in the last column
|
160
|
+
transaction.new_balance=data[4]
|
161
|
+
|
162
|
+
# add thew new transaction to the array
|
163
|
+
statement.add_transaction(transaction)
|
164
|
+
# break if $debug
|
165
|
+
end
|
166
|
+
rescue => exception
|
167
|
+
msg = "Failed to parse the transactions page at due to exception: #{exception.message}\nCheck your user name and password."
|
168
|
+
logger.fatal(msg);
|
169
|
+
logger.debug(exception)
|
170
|
+
logger.debug("Failed parsing transactions page:")
|
171
|
+
logger.debug("--------------------------------")
|
172
|
+
logger.debug(transactions_page) #.body
|
173
|
+
logger.debug("--------------------------------")
|
174
|
+
abort(msg)
|
175
|
+
end
|
176
|
+
|
177
|
+
# finish the statement to set the balances and dates
|
178
|
+
# and to fake the times since the bpi web pages
|
179
|
+
# don't hold the transaction times
|
180
|
+
statement.finish(true, true) # most_recent_first, fake_times
|
181
|
+
|
182
|
+
return statement
|
183
|
+
end
|
184
|
+
|
185
|
+
def get_account_number(transactions_page)
|
186
|
+
# make sure the page is a mechanize page, not hpricot
|
187
|
+
if transactions_page.kind_of?(Hpricot::Doc) then
|
188
|
+
page = WWW::Mechanize::Page.new(nil, {'content-type'=>'text/html'},
|
189
|
+
transactions_page.html, nil, nil)
|
190
|
+
else
|
191
|
+
page = transactions_page
|
192
|
+
end
|
193
|
+
|
194
|
+
# find the form for selecting an account -it's called 'form_mov'
|
195
|
+
form_mov = page.form('form_mov')
|
196
|
+
# the field for selecting the current account is in this form
|
197
|
+
account_selector = form_mov.field('contaCorrente')
|
198
|
+
# the selected account value is the account number but it has "|NR|" on the end so strip
|
199
|
+
# everything that's not a number
|
200
|
+
account_number = account_selector.value.gsub(/[^0-9]/,"")
|
201
|
+
return account_number
|
202
|
+
end
|
203
|
+
|
204
|
+
##
|
205
|
+
# Logs into the BPI banking app by finding the form
|
206
|
+
# setting the name and password and submitting it then
|
207
|
+
# waits a bit.
|
208
|
+
#
|
209
|
+
def login(agent)
|
210
|
+
logger.info("Logging in to #{LOGIN_URL}.")
|
211
|
+
if (scraper_args)
|
212
|
+
username, password = *scraper_args
|
213
|
+
end
|
214
|
+
raise "Login failed for BPI Scraper - pass user name and password using -scraper_args \"user <space> pass\"" unless (username and password)
|
215
|
+
|
216
|
+
# navigate to the login page
|
217
|
+
login_page = agent.get(LOGIN_URL)
|
218
|
+
|
219
|
+
# find login form - it's called 'signOn' - fill it out and submit it
|
220
|
+
form = login_page.form('signOn')
|
221
|
+
|
222
|
+
# username and password are taken from the commandline args, set them
|
223
|
+
# on USERID and PASSWORD which are the element names that the web page
|
224
|
+
# form uses to identify the form fields
|
225
|
+
form.USERID = username
|
226
|
+
form.PASSWORD = password
|
227
|
+
|
228
|
+
# submit the form - same as the user hitting the Login button
|
229
|
+
agent.submit(form)
|
230
|
+
sleep 3 # wait while the login takes effect
|
231
|
+
end
|
232
|
+
end # class BpiScraper
|
233
|
+
|
234
|
+
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bankjob
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- rhubarb
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-05-18 00:00:00 +01:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|