bankjob 0.5.1 → 0.5.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4 +1,11 @@
1
- == 0.5.1 2009-04-20
1
+ == 0.5.2 2009-05-18
2
+ * 3 minor enhancements:
3
+ * Added the ability to set fake timestamps on transactions so that transactions that have 00:00 for their time stamp that occur on the same day will be in the correct order in Wesabe
4
+ To use this in your scraper, call statement#finish(true, true)
5
+ * bpi_scraper.rb now sets the account number on the statement after scraping it. It turns out this is important for uploading multiple accounts to Wesabe - the accounts get mixed if the account numbers are wrong
6
+ * the Statement#finish method also updates the balance and start and end dates of the statement after the last transaction is scraped. This used to be done automatically in the attribute getters but did not support statements with most-recent transaction last (only first).
7
+
8
+ === 0.5.1 2009-04-20
2
9
  * 1 minor enhancement:
3
10
  * bpi_scraper.rb now accepts an optional third argument for the account number. Entering a number will cause the scraper to scrape that account rather than the default account.
4
11
 
@@ -8,5 +8,5 @@ require 'bankjob/scraper.rb'
8
8
  require 'bankjob/payee.rb'
9
9
 
10
10
  module Bankjob
11
- BANKJOB_VERSION = '0.5.1' unless defined?(BANKJOB_VERSION)
11
+ BANKJOB_VERSION = '0.5.2' unless defined?(BANKJOB_VERSION)
12
12
  end
@@ -12,7 +12,7 @@ module Bankjob
12
12
  logger = options.logger
13
13
 
14
14
  if options.wesabe_help
15
- Bankjob.wesabe_help(options.wesabe_args)
15
+ Bankjob.wesabe_help(options.wesabe_args, logger)
16
16
  exit(0) # Wesabe help describes to the user how to use the wesabe options then quits
17
17
  end
18
18
 
@@ -181,4 +181,4 @@ NEWFILEUID:NONE
181
181
  return output_file
182
182
  end
183
183
  end # class BankjobRunner
184
- end # module Bankjob
184
+ end # module Bankjob
@@ -69,6 +69,13 @@ module Bankjob
69
69
  # organized Transaction objects with clearer descriptions of the
70
70
  # transaction, etc.
71
71
  #
72
+ # +finish+ :: finishes a transaction by setting the balances and to and from dates
73
+ # based on the first and last transactions. Also, optionally, generates
74
+ # fake timestamps for transactions that have no time component in their
75
+ # dates. This is important for clients that use the timestamps to order
76
+ # the transactions correctly, and would otherwise mess up the order
77
+ # if all transactions on the same day were at the same time (E.g. Wesabe)
78
+ #
72
79
  # Here is an example of a simple (but incomplete) scraper.
73
80
  # Note that all of the scraping and parsing is in the +scrape_statement+ method, although
74
81
  # a lot of the details of Hpricot parsing are left up to the imagination of the reader.
@@ -67,6 +67,14 @@ module Bankjob
67
67
  # Use a constant to set this - defaults to CHECKING
68
68
  attr_accessor :account_type
69
69
 
70
+ # the last date of the period the statement covers
71
+ # Translates to the OFX element DTEND
72
+ attr_accessor :to_date
73
+
74
+ # the first date of the period the statement covers
75
+ # Translates to the OFX element DTSTART
76
+ attr_accessor :from_date
77
+
70
78
  ##
71
79
  # Creates a new empty Statement with no transactions.
72
80
  # The +account_number+ must be specified as a 1-22 character string.
@@ -77,6 +85,8 @@ module Bankjob
77
85
  @currency = currency
78
86
  @transactions = []
79
87
  @account_type = CHECKING
88
+ @closing_balance = nil
89
+ @closing_available = nil
80
90
  end
81
91
 
82
92
  ##
@@ -148,45 +158,6 @@ module Bankjob
148
158
  @transactions = merge_transactions(other)
149
159
  end
150
160
 
151
- ##
152
- # Returns the statement's start date.
153
- # The +from_date+ is taken from the date of the last transaction in the statement
154
- #
155
- def from_date()
156
- return nil if @transactions.empty?
157
- @transactions.last.date
158
- end
159
-
160
- ##
161
- # Returns the statement's end date.
162
- # The +to_date+ is taken from the date of the first transaction in the statement
163
- #
164
- def to_date()
165
- return nil if @transactions.empty?
166
- @transactions.first.date
167
- end
168
-
169
- ##
170
- # Returns the closing balance by looking at the
171
- # new balance of the first transaction.
172
- # If there are no transactions, +nil+ is returned.
173
- #
174
- def closing_balance()
175
- return nil if @closing_balance.nil? and @transactions.empty?
176
- @closing_balance ||= @transactions.first.new_balance
177
- end
178
-
179
- ##
180
- # Returns the closing available balance by looking at the
181
- # new balance of the first transaction.
182
- # If there are no transactions, +nil+ is returned.
183
- # Note that this is the same value returned as +closing_balance+.
184
- #
185
- def closing_available()
186
- return nil if @closing_available.nil? and @transactions.empty?
187
- @closing_available ||= @transactions.first.new_balance
188
- end
189
-
190
161
  ##
191
162
  # Generates a CSV (comma separated values) string with a single
192
163
  # row for each transaction.
@@ -342,6 +313,112 @@ module Bankjob
342
313
  }
343
314
  return buf
344
315
  end
316
+
317
+ ONE_MINUTE = 60
318
+ ELEVEN_59_PM = 23 * 60 * 60 + 59 * 60 # seconds at 23:59
319
+ MIDDAY = 12 * 60 * 60
320
+
321
+ ##
322
+ # Finishes the statement after scraping in two ways depending on the information
323
+ # that the scraper was able to obtain. Optionally have your scraper class call
324
+ # this after scraping is finished.
325
+ #
326
+ # This method:
327
+ #
328
+ # 1. Sets the closing balance and available_balance and the to_ and from_dates
329
+ # by using the first and last transactions in the list. Which transaction is
330
+ # used depends on whether +most_recent_first+ is true or false.
331
+ # The scraper may just set these directly in which case this may not be necessary.
332
+ #
333
+ # 2. If +fake_times+ is true time-stamps are invented and added to the transaction
334
+ # date attributes. This is useful if the website beings scraped shows dates, but
335
+ # not times, but has transactions listed in chronoligical arder.
336
+ # Without this process, the ofx generated has no proper no indication of the order of
337
+ # transactions that occurred in the same day other than the order in the statement
338
+ # and this may be ignored by the client. (Specifically, Wesabe will reorder transactions
339
+ # in the same day if they all appear to occur at the same time).
340
+ #
341
+ # Note that the algorithm to set the fake times is a little tricky. Assuming
342
+ # the transactionsa are most-recent-first, the first last transaction on each
343
+ # day is set at 11:59pm each transaction prior to that is one minute earlier.
344
+ #
345
+ # But for the first transactions in the statement, the first is set at a few
346
+ # minutes after midnight, then we count backward. (The actual number of minutes
347
+ # is based on the number of transactions + 1 to be sure it doesnt pass midnight)
348
+ #
349
+ # This is crucial because transactions for a given day will often span 2 or more
350
+ # statement. By starting just after midnight and going back to just before midnight
351
+ # we reduce the chance of overlap.
352
+ #
353
+ # If the to-date is the same as the from-date for a transaction, then we start at
354
+ # midday, so that prior and subsequent statements don't overlap.
355
+ #
356
+ # This simple algorithm basically guarantees no overlaps so long as:
357
+ # i. The number of transactions is small compared to the number of minutes in a day
358
+ # ii. A single day will not span more than 3 statements
359
+ #
360
+ # If the statement is most-recent-last (+most_recent_first = false+) the same
361
+ # algorithm is applied, only in reverse
362
+ #
363
+ def finish(most_recent_first, fake_times=false)
364
+ if !@transactions.empty? then
365
+ # if the user hasn't set the balances, set them to the first or last
366
+ # transaction balance depending on the order
367
+ if most_recent_first then
368
+ @closing_balance ||= transactions.first.new_balance
369
+ @closing_available ||= transactions.first.new_balance
370
+ @to_date ||= transactions.first.date
371
+ @from_date ||= transactions.last.date
372
+ else
373
+ @closing_balance ||= transactions.last.new_balance
374
+ @closing_available ||= transactions.last.new_balance
375
+ @to_date ||= transactions.last.date
376
+ @from_date ||= transactions.first.date
377
+ end
378
+
379
+ if fake_times and to_date.hour == 0 then
380
+ # the statement was unable to scrape times to go with the dates, but the
381
+ # client (say wesabe) will get the transaction order wrong if there are no
382
+ # times, so here we add times that order the transactions according to the
383
+ # order of the array of transactions
384
+
385
+ # the delta is 1 minute forward or backward fr
386
+ if to_date == from_date then
387
+ # all of the statement's transactions occur in the same day - to try to
388
+ # avoid overlap with subsequent or previous transacitons we group order them
389
+ # from 11am onward
390
+ seconds = MIDDAY
391
+ else
392
+ seconds = (transactions.length + 1) * 60
393
+ end
394
+
395
+ if most_recent_first then
396
+ yday = transactions.first.date.yday
397
+ start = 0
398
+ delta = 1
399
+ finish = transactions.length
400
+ else
401
+ yday = transactions.last.date.yday
402
+ start = transactions.length - 1
403
+ finish = -1
404
+ delta = -1
405
+ end
406
+
407
+ i = start
408
+ until i == finish
409
+ tx = transactions[i]
410
+ if tx.date.yday != yday
411
+ # starting a new day, begin the countdown from 23:59 again
412
+ yday = tx.date.yday
413
+ seconds = ELEVEN_59_PM
414
+ end
415
+ tx.date += seconds unless tx.date.hour > 0
416
+ seconds -= ONE_MINUTE
417
+ i += delta
418
+ end
419
+ end
420
+ end
421
+ end
345
422
 
346
423
  def to_s
347
424
  buf = "#{self.class}: close_bal = #{closing_balance}, avail = #{closing_available}, curr = #{currency}, transactions:"
@@ -352,4 +429,4 @@ module Bankjob
352
429
  return buf
353
430
  end
354
431
  end # class Statement
355
- end # module
432
+ end # module
@@ -6,7 +6,7 @@ module Bankjob
6
6
 
7
7
  ##
8
8
  # Takes a date-time as a string or as a Time or DateTime object and returns
9
- # it as either a Time or a DateTime object.
9
+ # it as either a Time object.
10
10
  #
11
11
  # This is useful in the setter method of a date attribute allowing the date
12
12
  # to be set as any type but stored internally as an object compatible with
@@ -14,8 +14,8 @@ module Bankjob
14
14
  # (Bankjob::Transaction uses this internally in the setter for +date+ for example
15
15
  #
16
16
  def self.create_date_time(date_time_raw)
17
- if (date_time_raw.respond_to?(:rfc822)) then
18
- # It's already a Time or DateTime
17
+ if (date_time_raw.is_a?(Time)) then
18
+ # It's already a Time
19
19
  return date_time_raw
20
20
  elsif (date_time_raw.to_s.strip.empty?)
21
21
  # Nil or non dates are returned as nil
@@ -167,7 +167,7 @@ module Bankjob
167
167
  # When used with Wesabe account and password, will log into Wesabe and list
168
168
  # the users accounts, and suggest command line args to upload to each account.
169
169
  #
170
- def self.wesabe_help(wesabe_args)
170
+ def self.wesabe_help(wesabe_args, logger)
171
171
  if (wesabe_args.nil? or wesabe_args.length != 2)
172
172
  puts <<-EOF
173
173
  Wesabe (http://www.wesabe.com) is an online bank account management tool (like Mint)
@@ -240,12 +240,15 @@ Troubleshooting:
240
240
  end
241
241
  end
242
242
  rescue Exception => e
243
- raise <<-EOF
243
+ msg =<<-EOF
244
244
  Failed to get Wesabe account information due to: #{e.message}.
245
245
  Check your username and password or use:
246
246
  bankjob --wesabe-help
247
247
  with no arguments for more details.
248
248
  EOF
249
+ logger.debug(msg)
250
+ logger.debug(e)
251
+ raise msg
249
252
  end
250
253
  end
251
254
  end # wesabe_help
@@ -1,205 +1,234 @@
1
-
2
- require 'rubygems'
3
- require 'bankjob' # this require will pull in all the classes we need
4
- require 'base_scraper' # this defines scraper that BpiScraper extends
5
-
6
- include Bankjob # access the namespace of Bankjob
7
-
8
- ##
9
- # BpiScraper is a scraper tailored to the BPI bank in Portugal (www.bpinet.pt).
10
- # It takes advantage of the BaseScraper to create the mechanize agent,
11
- # then followins the basic recipe there of first loading the tranasctions page
12
- # then parsing it.
13
- #
14
- # In addition to actually working for the BPI online banking, this class serves
15
- # as an example of how to build your own scraper.
16
- #
17
- # BpiScraper expects the user name and password to be passed on the command line
18
- # using --scraper-args "user password" (with a space between them).
19
- # Optionally, the account number can also be specified with the 3rd argument so:
20
- # --scraper-args "user password 803030000001" causing that account to be selected
21
- # before scraping the statement
22
- #
23
- class BpiScraper < BaseScraper
24
-
25
- currency "EUR" # Set the currency as euros
26
- decimal "," # BPI statements use commas as separators - this is used by the real_amount method
27
- account_number "1234567" # override this with a real accoun number
28
- account_type Statement::CHECKING # this is the default anyway
29
-
30
- # This rule detects ATM withdrawals and modifies
31
- # the description and sets the the type
32
- transaction_rule do |tx|
33
- if (tx.real_amount < 0)
34
- if tx.raw_description =~ /LEV.*ATM ELEC\s+\d+\/\d+\s+/i
35
- tx.description = "Multibanco withdrawal at #{$'}"
36
- tx.type = Transaction::ATM
37
- end
38
- end
39
- end
40
-
41
- # This rule detects checque payments and modifies the description
42
- # and sets the type
43
- transaction_rule do |tx|
44
- if tx.raw_description =~ /CHEQUE\s+(\d+)/i
45
- cheque_number = $+ # $+ holds the last group of the match which is (\d+)
46
- # change the description but append $' in case there was trailing text after the cheque no
47
- tx.description = "Cheque ##{cheque_number} withdrawn #{$'}"
48
- tx.type = Transaction::CHECK
49
- tx.check_number = cheque_number
50
- end
51
- end
52
-
53
- # This rule goes last and sets the description of transactions
54
- # that haven't had their description to the raw description after
55
- # changing the words to have capital letters only on the first word.
56
- # (Note that +description+ will default to being the same as +raw_description+
57
- # anyway - this rule is only for making the all uppercase output less ugly)
58
- # The payee is also fixed in this way
59
- transaction_rule(-999) do |tx|
60
- if (tx.description == tx.raw_description)
61
- tx.description = Bankjob.capitalize_words(tx.raw_description)
62
- end
63
- end
64
-
65
- # Some constants for the URLs and main elements in the BPI bank app
66
- LOGIN_URL = 'https://www.bpinet.pt/'
67
- TRANSACTIONS_URL = 'https://www.bpinet.pt/areaInf/consultas/Movimentos/Movimentos.asp'
68
-
69
- ##
70
- # Uses the mechanize web +agent+ to fetch the page holding the most recent
71
- # bank transactions and returns it.
72
- # This overrides (implements) +fetch_transactions_page+ in BaseScraper
73
- #
74
- def fetch_transactions_page(agent)
75
- login(agent)
76
- logger.info("Logged in, now navigating to transactions on #{TRANSACTIONS_URL}.")
77
- transactions_page = agent.get(TRANSACTIONS_URL)
78
- if (transactions_page.nil?)
79
- raise "BPI Scraper failed to load the transactions page at #{TRANSACTIONS_URL}"
80
- end
81
-
82
- # If there is a third scraper arg, it is the account number and we use it
83
- # to select the account on the transactions page
84
- if (scraper_args and scraper_args.length > 2)
85
- account = scraper_args[2]
86
- # the account selector is the field 'contaCorrente' in the form 'form_mov'
87
- Bankjob.select_and_submit(transactions_page, 'form_mov', 'contaCorrente', account)
88
- sleep 1
89
- # refetch the transactions page after selecting the account
90
- transactions_page = agent.get(TRANSACTIONS_URL)
91
- end
92
-
93
- return transactions_page
94
- end
95
-
96
-
97
- ##
98
- # Parses the BPI page listing about a weeks worth of transactions
99
- # and creates a Transaction for each one, putting them together
100
- # in a Statement.
101
- # Overrides (implements) +parse_transactions_page+ in BaseScraper.
102
- #
103
- def parse_transactions_page(transactions_page)
104
- begin
105
- statement = create_statement
106
-
107
- # Find the closing balance avaliable and accountable
108
- # Get from this:
109
- # <td valign="middle" width="135" ALIGN="left" class="TextoAzulBold">Saldo Disponível:</td>
110
- # <td valign="middle" width="110" ALIGN="right">1.751,31&nbsp;EUR</td>
111
- # to 1751,31
112
- available_cell = (transactions_page/"td").select { |ele| ele.inner_text =~ /^Saldo Dispon/ }.first.next_sibling
113
- statement.closing_available = available_cell.inner_text.scan(/[\d.,]+/)[0].gsub(/\./,"")
114
- account_balance_cell = (transactions_page/"td").select { |ele| ele.inner_text =~ /^Saldo Contab/ }.first.next_sibling
115
- statement.closing_balance = account_balance_cell.inner_text.scan(/[\d.,]+/)[0].gsub(/\./,"")
116
-
117
- transactions = []
118
-
119
- # find the first header with the CSS class "Laranja" as this will be the first
120
- # header in the transactions table
121
- header = (transactions_page/"td.Laranja").first
122
-
123
- # the table element is the grandparent element of this header (the row is the parent)
124
- table = header.parent.parent
125
-
126
- # each row with the valign attribute set to "top" holds a transaction
127
- rows = (table/"tr[@valign=top]")
128
- rows.each do |row|
129
- transaction = create_transaction # use the support method because it sets the separator
130
-
131
- # collect all of the table cells' inner html in an array (stripping leading/trailing spaces)
132
- data = (row/"td").collect{ |cell| cell.inner_html.strip }
133
-
134
- # the first (0th) column holds the date
135
- transaction.date = data[0]
136
-
137
- # the 2nd column holds the value date - but it's often empty
138
- # in which case we set it to nil
139
- vdate = data[1]
140
- if vdate.nil? or vdate.length == 0 or vdate.strip == "&nbsp;"
141
- transaction.value_date = nil
142
- else
143
- transaction.value_date = vdate
144
- end
145
-
146
- # the transaction raw_description is in the 3rd column
147
- transaction.raw_description = data[2]
148
-
149
- # the 4th column holds the transaction amount (with comma as decimal place)
150
- transaction.amount = data[3]
151
-
152
- # the new balance is in the last column
153
- transaction.new_balance=data[4]
154
-
155
- # add thew new transaction to the array
156
- transactions << transaction
157
- # break if $debug
158
- end
159
- rescue => exception
160
- msg = "Failed to parse the transactions page at due to exception: #{exception.message}\nCheck your user name and password."
161
- logger.fatal(msg);
162
- logger.debug(exception)
163
- logger.debug("Failed parsing transactions page:")
164
- logger.debug("--------------------------------")
165
- logger.debug(transactions_page) #.body
166
- logger.debug("--------------------------------")
167
- abort(msg)
168
- end
169
-
170
- # set the transactions on the statement
171
- statement.transactions = transactions
172
- return statement
173
- end
174
-
175
- ##
176
- # Logs into the BPI banking app by finding the form
177
- # setting the name and password and submitting it then
178
- # waits a bit.
179
- #
180
- def login(agent)
181
- logger.info("Logging in to #{LOGIN_URL}.")
182
- if (scraper_args)
183
- username, password = *scraper_args
184
- end
185
- raise "Login failed for BPI Scraper - pass user name and password using -scraper_args \"user <space> pass\"" unless (username and password)
186
-
187
- # navigate to the login page
188
- login_page = agent.get(LOGIN_URL)
189
-
190
- # find login form - it's called 'signOn' - fill it out and submit it
191
- form = login_page.form('signOn')
192
-
193
- # username and password are taken from the commandline args, set them
194
- # on USERID and PASSWORD which are the element names that the web page
195
- # form uses to identify the form fields
196
- form.USERID = username
197
- form.PASSWORD = password
198
-
199
- # submit the form - same as the user hitting the Login button
200
- agent.submit(form)
201
- sleep 3 # wait while the login takes effect
202
- end
203
- end # class BpiScraper
204
-
205
-
1
+
2
+ require 'rubygems'
3
+ require 'bankjob' # this require will pull in all the classes we need
4
+ require 'base_scraper' # this defines scraper that BpiScraper extends
5
+
6
+ include Bankjob # access the namespace of Bankjob
7
+
8
+ ##
9
+ # BpiScraper is a scraper tailored to the BPI bank in Portugal (www.bpinet.pt).
10
+ # It takes advantage of the BaseScraper to create the mechanize agent,
11
+ # then followins the basic recipe there of first loading the tranasctions page
12
+ # then parsing it.
13
+ #
14
+ # In addition to actually working for the BPI online banking, this class serves
15
+ # as an example of how to build your own scraper.
16
+ #
17
+ # BpiScraper expects the user name and password to be passed on the command line
18
+ # using --scraper-args "user password" (with a space between them).
19
+ # Optionally, the account number can also be specified with the 3rd argument so:
20
+ # --scraper-args "user password 803030000001" causing that account to be selected
21
+ # before scraping the statement
22
+ #
23
+ class BpiScraper < BaseScraper
24
+
25
+ currency "EUR" # Set the currency as euros
26
+ decimal "," # BPI statements use commas as separators - this is used by the real_amount method
27
+ account_number "1234567" # override this with a real account number
28
+ account_type Statement::CHECKING # this is the default anyway
29
+
30
+ # This rule detects ATM withdrawals and modifies
31
+ # the description and sets the the type
32
+ transaction_rule do |tx|
33
+ if (tx.real_amount < 0)
34
+ if tx.raw_description =~ /LEV.*ATM ELEC\s+\d+\/\d+\s+/i
35
+ tx.description = "Multibanco withdrawal at #{$'}"
36
+ tx.type = Transaction::ATM
37
+ end
38
+ end
39
+ end
40
+
41
+ # This rule detects checque payments and modifies the description
42
+ # and sets the type
43
+ transaction_rule do |tx|
44
+ if tx.raw_description =~ /CHEQUE\s+(\d+)/i
45
+ cheque_number = $+ # $+ holds the last group of the match which is (\d+)
46
+ # change the description but append $' in case there was trailing text after the cheque no
47
+ tx.description = "Cheque ##{cheque_number} withdrawn #{$'}"
48
+ tx.type = Transaction::CHECK
49
+ tx.check_number = cheque_number
50
+ end
51
+ end
52
+
53
+ # This rule goes last and sets the description of transactions
54
+ # that haven't had their description to the raw description after
55
+ # changing the words to have capital letters only on the first word.
56
+ # (Note that +description+ will default to being the same as +raw_description+
57
+ # anyway - this rule is only for making the all uppercase output less ugly)
58
+ # The payee is also fixed in this way
59
+ transaction_rule(-999) do |tx|
60
+ if (tx.description == tx.raw_description)
61
+ tx.description = Bankjob.capitalize_words(tx.raw_description)
62
+ end
63
+ end
64
+
65
+ # Some constants for the URLs and main elements in the BPI bank app
66
+ LOGIN_URL = 'https://www.bpinet.pt/'
67
+ TRANSACTIONS_URL = 'https://www.bpinet.pt/areaInf/consultas/Movimentos/Movimentos.asp'
68
+
69
+ ##
70
+ # Uses the mechanize web +agent+ to fetch the page holding the most recent
71
+ # bank transactions and returns it.
72
+ # This overrides (implements) +fetch_transactions_page+ in BaseScraper
73
+ #
74
+ def fetch_transactions_page(agent)
75
+ login(agent)
76
+ logger.info("Logged in, now navigating to transactions on #{TRANSACTIONS_URL}.")
77
+ transactions_page = agent.get(TRANSACTIONS_URL)
78
+ if (transactions_page.nil?)
79
+ raise "BPI Scraper failed to load the transactions page at #{TRANSACTIONS_URL}"
80
+ end
81
+
82
+ # If there is a third scraper arg, it is the account number and we use it
83
+ # to select the account on the transactions page
84
+ if (scraper_args and scraper_args.length > 2)
85
+ account = scraper_args[2]
86
+ # the account selector is the field 'contaCorrente' in the form 'form_mov'
87
+ Bankjob.select_and_submit(transactions_page, 'form_mov', 'contaCorrente', account)
88
+ sleep 1
89
+ # refetch the transactions page after selecting the account
90
+ transactions_page = agent.get(TRANSACTIONS_URL)
91
+ end
92
+
93
+ return transactions_page
94
+ end
95
+
96
+
97
+ ##
98
+ # Parses the BPI page listing about a weeks worth of transactions
99
+ # and creates a Transaction for each one, putting them together
100
+ # in a Statement.
101
+ # Overrides (implements) +parse_transactions_page+ in BaseScraper.
102
+ #
103
+ def parse_transactions_page(transactions_page)
104
+ begin
105
+ statement = create_statement
106
+
107
+ account_number = get_account_number(transactions_page)
108
+ statement.account_number = account_number unless account_number.nil?
109
+
110
+ # Find the closing balance avaliable and accountable
111
+ # Get from this:
112
+ # <td valign="middle" width="135" ALIGN="left" class="TextoAzulBold">Saldo Disponível:</td>
113
+ # <td valign="middle" width="110" ALIGN="right">1.751,31&nbsp;EUR</td>
114
+ # to 1751,31
115
+ # Commenting out balances for now to let the balance be taken from the
116
+ # top-most transaction - this keeps balances in synch with actual transactions
117
+ # and allows for statements created for past dates (the balance at the top of the
118
+ # page is always the current one, not the one for the last transaction on that page)
119
+ #available_cell = (transactions_page/"td").select { |ele| ele.inner_text =~ /^Saldo Dispon/ }.first.next_sibling
120
+ #statement.closing_available = available_cell.inner_text.scan(/[\d.,]+/)[0].gsub(/\./,"")
121
+ #account_balance_cell = (transactions_page/"td").select { |ele| ele.inner_text =~ /^Saldo Contab/ }.first.next_sibling
122
+ #statement.closing_balance = account_balance_cell.inner_text.scan(/[\d.,]+/)[0].gsub(/\./,"")
123
+
124
+ #transactions = []
125
+
126
+ # find the first header with the CSS class "Laranja" as this will be the first
127
+ # header in the transactions table
128
+ header = (transactions_page/"td.Laranja").first
129
+
130
+ # the table element is the grandparent element of this header (the row is the parent)
131
+ table = header.parent.parent
132
+
133
+ # each row with the valign attribute set to "top" holds a transaction
134
+ rows = (table/"tr[@valign=top]")
135
+ rows.each do |row|
136
+ transaction = create_transaction # use the support method because it sets the separator
137
+
138
+ # collect all of the table cells' inner html in an array (stripping leading/trailing spaces)
139
+ data = (row/"td").collect{ |cell| cell.inner_html.strip }
140
+
141
+ # the first (0th) column holds the date
142
+ transaction.date = data[0]
143
+
144
+ # the 2nd column holds the value date - but it's often empty
145
+ # in which case we set it to nil
146
+ vdate = data[1]
147
+ if vdate.nil? or vdate.length == 0 or vdate.strip == "&nbsp;"
148
+ transaction.value_date = nil
149
+ else
150
+ transaction.value_date = vdate
151
+ end
152
+
153
+ # the transaction raw_description is in the 3rd column
154
+ transaction.raw_description = data[2]
155
+
156
+ # the 4th column holds the transaction amount (with comma as decimal place)
157
+ transaction.amount = data[3]
158
+
159
+ # the new balance is in the last column
160
+ transaction.new_balance=data[4]
161
+
162
+ # add thew new transaction to the array
163
+ statement.add_transaction(transaction)
164
+ # break if $debug
165
+ end
166
+ rescue => exception
167
+ msg = "Failed to parse the transactions page at due to exception: #{exception.message}\nCheck your user name and password."
168
+ logger.fatal(msg);
169
+ logger.debug(exception)
170
+ logger.debug("Failed parsing transactions page:")
171
+ logger.debug("--------------------------------")
172
+ logger.debug(transactions_page) #.body
173
+ logger.debug("--------------------------------")
174
+ abort(msg)
175
+ end
176
+
177
+ # finish the statement to set the balances and dates
178
+ # and to fake the times since the bpi web pages
179
+ # don't hold the transaction times
180
+ statement.finish(true, true) # most_recent_first, fake_times
181
+
182
+ return statement
183
+ end
184
+
185
+ def get_account_number(transactions_page)
186
+ # make sure the page is a mechanize page, not hpricot
187
+ if transactions_page.kind_of?(Hpricot::Doc) then
188
+ page = WWW::Mechanize::Page.new(nil, {'content-type'=>'text/html'},
189
+ transactions_page.html, nil, nil)
190
+ else
191
+ page = transactions_page
192
+ end
193
+
194
+ # find the form for selecting an account -it's called 'form_mov'
195
+ form_mov = page.form('form_mov')
196
+ # the field for selecting the current account is in this form
197
+ account_selector = form_mov.field('contaCorrente')
198
+ # the selected account value is the account number but it has "|NR|" on the end so strip
199
+ # everything that's not a number
200
+ account_number = account_selector.value.gsub(/[^0-9]/,"")
201
+ return account_number
202
+ end
203
+
204
+ ##
205
+ # Logs into the BPI banking app by finding the form
206
+ # setting the name and password and submitting it then
207
+ # waits a bit.
208
+ #
209
+ def login(agent)
210
+ logger.info("Logging in to #{LOGIN_URL}.")
211
+ if (scraper_args)
212
+ username, password = *scraper_args
213
+ end
214
+ raise "Login failed for BPI Scraper - pass user name and password using -scraper_args \"user <space> pass\"" unless (username and password)
215
+
216
+ # navigate to the login page
217
+ login_page = agent.get(LOGIN_URL)
218
+
219
+ # find login form - it's called 'signOn' - fill it out and submit it
220
+ form = login_page.form('signOn')
221
+
222
+ # username and password are taken from the commandline args, set them
223
+ # on USERID and PASSWORD which are the element names that the web page
224
+ # form uses to identify the form fields
225
+ form.USERID = username
226
+ form.PASSWORD = password
227
+
228
+ # submit the form - same as the user hitting the Login button
229
+ agent.submit(form)
230
+ sleep 3 # wait while the login takes effect
231
+ end
232
+ end # class BpiScraper
233
+
234
+
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bankjob
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - rhubarb
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-04-20 00:00:00 +01:00
12
+ date: 2009-05-18 00:00:00 +01:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency