bankjob 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,400 @@
1
+
2
+ require 'rubygems'
3
+ require 'builder'
4
+ require 'digest/md5'
5
+ require 'bankjob.rb'
6
+
7
+ module Bankjob
8
+
9
+ ##
10
+ # A Transaction object represents a transaction in a bank account (a withdrawal, deposit,
11
+ # transfer, etc) and is generally the result of running a Bankjob scraper.
12
+ #
13
+ # A Scraper will create Transactions while scraping web pages in an online banking site.
14
+ # These Transactions will be collected in a Statement object which will then be written
15
+ # to a file.
16
+ #
17
+ # A Transaction object knows how to write itself as a record in a CSV
18
+ # (Comma Separated Values) file using +to_csv+ or as an XML element in an
19
+ # OFX (Open Financial eXchange http://www.ofx.net) file using +to_ofx+
20
+ #
21
+ class Transaction
22
+
23
+ # OFX transaction type for Generic credit
24
+ CREDIT = "CREDIT"
25
+
26
+ # OFX transaction type for Generic debit
27
+ DEBIT = "DEBIT"
28
+
29
+ # OFX transaction type for Interest earned or paid. (Depends on signage of amount)
30
+ INT = "INT"
31
+
32
+ # OFX transaction type for Dividend
33
+ DIV = "DIV"
34
+
35
+ # OFX transaction type for FI fee
36
+ FEE = "FEE"
37
+
38
+ # OFX transaction type for Service charge
39
+ SRVCHG = "SRVCHG"
40
+
41
+ # OFX transaction type for Deposit
42
+ DEP = "DEP"
43
+
44
+ # OFX transaction type for ATM debit or credit. (Depends on signage of amount)
45
+ ATM = "ATM"
46
+
47
+ # OFX transaction type for Point of sale debit or credit. (Depends on signage of amount)
48
+ POS = "POS"
49
+
50
+ # OFX transaction type for Transfer
51
+ XFER = "XFER"
52
+
53
+ # OFX transaction type for Check
54
+ CHECK = "CHECK"
55
+
56
+ # OFX transaction type for Electronic payment
57
+ PAYMENT = "PAYMENT"
58
+
59
+ # OFX transaction type for Cash withdrawal
60
+ CASH = "CASH"
61
+
62
+ # OFX transaction type for Direct deposit
63
+ DIRECTDEP = "DIRECTDEP"
64
+
65
+ # OFX transaction type for Merchant initiated debit
66
+ DIRECTDEBIT = "DIRECTDEBIT"
67
+
68
+ # OFX transaction type for Repeating payment/standing order
69
+ REPEATPMT = "REPEATPMT"
70
+
71
+ # OFX transaction type for Other
72
+ OTHER = "OTHER"
73
+
74
+ # OFX type of the transaction (credit, debit, atm withdrawal, etc)
75
+ # Translates to the OFX element TRNTYPE and according to the OFX 2.0.3 schema this can be one of
76
+ # * CREDIT
77
+ # * DEBIT
78
+ # * INT
79
+ # * DIV
80
+ # * FEE
81
+ # * SRVCHG
82
+ # * DEP
83
+ # * ATM
84
+ # * POS
85
+ # * XFER
86
+ # * CHECK
87
+ # * PAYMENT
88
+ # * CASH
89
+ # * DIRECTDEP
90
+ # * DIRECTDEBIT
91
+ # * REPEATPMT
92
+ # * OTHER
93
+ attr_accessor :type
94
+
95
+ # date of the transaction
96
+ # Translates to OFX element DTPOSTED
97
+ attr_accessor :date
98
+
99
+ # the date the value affects the account (e.g. funds become available)
100
+ # Translates to OFX element DTUSER
101
+ attr_accessor :value_date
102
+
103
+ # description of the transaction
104
+ # This description is typically set by taking the raw description and
105
+ # applying rules. If it is not set explicitly it returns the same
106
+ # value as +raw_description+
107
+ # Translates to OFX element MEMO
108
+ attr_accessor :description
109
+
110
+ # the original format of the description as scraped from the bank site
111
+ # This allows the raw information to be preserved when modifying the
112
+ # +description+ with transaction rules (see Scraper#transaction_rule)
113
+ # This does _not_ appear in the OFX output, only +description+ does.
114
+ attr_accessor :raw_description
115
+
116
+ # amount of the credit or debit (negative for debits)
117
+ # Translates to OFX element TRNAMT
118
+ attr_accessor :amount
119
+
120
+ # account balance after the transaction
121
+ # Not used in OFX but important for working out statement balances
122
+ attr_accessor :new_balance
123
+
124
+ # account balance after the transaction as a numeric Ruby Float
125
+ # Not used in OFX but important for working out statement balances
126
+ # in calculations (see #real_amount)
127
+ attr_reader :real_new_balance
128
+
129
+ # the generated unique id for this transaction in an OFX record
130
+ # Translates to OFX element FITID this is generated if not set
131
+ attr_accessor :ofx_id
132
+
133
+ # the payee of an expenditure (ie a debit or transfer)
134
+ # This is of type Payee and translates to complex OFX element PAYEE
135
+ attr_accessor :payee
136
+
137
+ # the cheque number of a cheque transaction
138
+ # This is of type Payee and translates to OFX element CHECKNUM
139
+ attr_accessor :check_number
140
+
141
+ ##
142
+ # the numeric real-number amount of the transaction.
143
+ #
144
+ # The transaction amount is typically a string and may hold commas for
145
+ # 1000s or for decimal separators, making it unusable for mathematical
146
+ # operations.
147
+ #
148
+ # This attribute returns the amount converted to a Ruby Float, which can
149
+ # be used in operations like:
150
+ # <tt>
151
+ # if (transaction.real_amount < 0)
152
+ # puts "It's a debit!"
153
+ # end
154
+ #
155
+ # The +real_amount+ attribute is calculated using the +decimal+ separator
156
+ # passed into the constructor (defaults to ".")
157
+ # See Scraper#decimal
158
+ #
159
+ # This attribute is not used in OFX.
160
+ #
161
+ attr_reader :real_amount
162
+
163
+ ##
164
+ # Creates a new Transaction with the specified attributes.
165
+ #
166
+ def initialize(decimal = ".")
167
+ @ofx_id = nil
168
+ @date = nil
169
+ @value_date = nil
170
+ @raw_description = nil
171
+ @description = nil
172
+ @amount = 0
173
+ @new_balance = 0
174
+ @decimal = decimal
175
+
176
+ # Always create a Payee even if it doesn't get used - this ensures an empty
177
+ # <PAYEE> element in the OFX output which is more correct and, for one thing,
178
+ # stops Wesabe from adding UNKNOWN PAYEE to every transaction (even deposits)
179
+ @payee = Payee.new()
180
+ @check_number = nil
181
+ @type = OTHER
182
+ end
183
+
184
+ def date=(raw_date_time)
185
+ @date = Bankjob.create_date_time(raw_date_time)
186
+ end
187
+
188
+ def value_date=(raw_date_time)
189
+ @value_date = Bankjob.create_date_time(raw_date_time)
190
+ end
191
+
192
+ ##
193
+ # Creates a unique ID for the transaction for use in OFX documents, unless
194
+ # one has already been set.
195
+ # All OFX transactions need a unique identifier.
196
+ #
197
+ # Note that this is generated by creating an MD5 digest of the transaction
198
+ # date, raw description, type, amount and new_balance. Which means that two
199
+ # identical transactions will always produce the same +ofx_id+.
200
+ # (This is important so that repeated scrapes of the same transaction value
201
+ # produce identical ofx_id values)
202
+ #
203
+ def ofx_id()
204
+ if @ofx_id.nil?
205
+ text = "#{@date}:#{@raw_description}:#{@type}:#{@amount}:#{@new_balance}"
206
+ @ofx_id= Digest::MD5.hexdigest(text)
207
+ end
208
+ return @ofx_id
209
+ end
210
+
211
+ ##
212
+ # Returns the description, defaulting to the +raw_description+ if no
213
+ # specific description has been set by the user.
214
+ #
215
+ def description()
216
+ @description.nil? ? raw_description : @description
217
+ end
218
+
219
+ ##
220
+ # Returns the Transaction amount attribute as a ruby Float after
221
+ # replacing the decimal separator with a . and stripping any other
222
+ # separators.
223
+ #
224
+ def real_amount()
225
+ Bankjob.string_to_float(amount, @decimal)
226
+ end
227
+
228
+ ##
229
+ # Returns the new balance after the transaction as a ruby Float after
230
+ # replacing the decimal separator with a . and stripping any other
231
+ # separators.
232
+ #
233
+ def real_new_balance()
234
+ Bankjob.string_to_float(new_balance, @decimal)
235
+ end
236
+
237
+ ##
238
+ # Generates a string representing this Transaction as comma separated values
239
+ # in the form:
240
+ #
241
+ # <tt>date, value_date, description, real_amount, real_new_balance, amount, new_balance, raw_description, ofx_id</tt>
242
+ #
243
+ def to_csv
244
+ # if there's a payee, prepend their name to the description - otherwise skip it
245
+ if (not payee.nil? and (not payee.name.nil?))
246
+ desc = payee.name + " - " + description
247
+ else
248
+ desc = description
249
+ end
250
+ [Bankjob.date_time_to_csv(date), Bankjob.date_time_to_csv(value_date), desc, real_amount, real_new_balance, amount, new_balance, raw_description, ofx_id].to_csv
251
+ end
252
+
253
+ ##
254
+ # Generates a string for use as a header in a CSV file for transactions.
255
+ # This will produce the following string:
256
+ #
257
+ # <tt>date, value_date, description, real_amount, real_new_balance, amount, new_balance, raw_description, ofx_id</tt>
258
+ #
259
+ def self.csv_header
260
+ %w{ Date Value-Date Description Amount New-Balance Raw-Amount Raw-New-Balance Raw-Description OFX-ID }.to_csv
261
+ end
262
+
263
+ ##
264
+ # Creates a new Transaction from a string that defines a row in a CSV file.
265
+ #
266
+ # +csv_row+ must hold an array of values in precisely this order:
267
+ #
268
+ # <tt>date, value_date, description, real_amount, real_new_balance, amount, new_balance, raw_description, ofx_id</tt>
269
+ #
270
+ # <em>(The format should be the same as that produced by +to_csv+)</em>
271
+ #
272
+ def self.from_csv(csv_row, decimal)
273
+ if (csv_row.length != 9) # must have 9 cols
274
+ csv_lines = csv_row.join("\n\t")
275
+ msg = "Failed to create Transaction from csv row: \n\t#{csv_lines}\n"
276
+ msg << " - 9 columns are required in the form: date, value_date, "
277
+ msg << "description, real_amount, real_new_balance, amount, new_balance, "
278
+ msg << "raw_description, ofx_id"
279
+ raise msg
280
+ end
281
+ tx = Transaction.new(decimal)
282
+ tx.date, tx.value_date, tx.description = csv_row[0..2]
283
+ # skip real_amount and real_new_balance, they're read only and calculated
284
+ tx.amount, tx.new_balance, tx.raw_description, tx.ofx_id = csv_row[5..8]
285
+ return tx
286
+ end
287
+
288
+ ##
289
+ # Generates an XML string adhering to the OFX standard
290
+ # (see Open Financial Exchange http://www.ofx.net)
291
+ # representing a single Transaction XML element.
292
+ #
293
+ # The OFX 2 schema defines a STMTTRN (SatementTransaction) as follows:
294
+ #
295
+ # <xsd:complexType name="StatementTransaction">
296
+ # <xsd:annotation>
297
+ # <xsd:documentation>
298
+ # The OFX element "STMTTRN" is of type "StatementTransaction"
299
+ # </xsd:documentation>
300
+ # </xsd:annotation>
301
+ # <xsd:sequence>
302
+ # <xsd:element name="TRNTYPE" type="ofx:TransactionEnum"/>
303
+ # <xsd:element name="DTPOSTED" type="ofx:DateTimeType"/>
304
+ # <xsd:element name="DTUSER" type="ofx:DateTimeType" minOccurs="0"/>
305
+ # <xsd:element name="DTAVAIL" type="ofx:DateTimeType" minOccurs="0"/>
306
+ # <xsd:element name="TRNAMT" type="ofx:AmountType"/>
307
+ # <xsd:element name="FITID" type="ofx:FinancialInstitutionTransactionIdType"/>
308
+ # <xsd:sequence minOccurs="0">
309
+ # <xsd:element name="CORRECTFITID" type="ofx:FinancialInstitutionTransactionIdType"/>
310
+ # <xsd:element name="CORRECTACTION" type="ofx:CorrectiveActionEnum"/>
311
+ # </xsd:sequence>
312
+ # <xsd:element name="SRVRTID" type="ofx:ServerIdType" minOccurs="0"/>
313
+ # <xsd:element name="CHECKNUM" type="ofx:CheckNumberType" minOccurs="0"/>
314
+ # <xsd:element name="REFNUM" type="ofx:ReferenceNumberType" minOccurs="0"/>
315
+ # <xsd:element name="SIC" type="ofx:StandardIndustryCodeType" minOccurs="0"/>
316
+ # <xsd:element name="PAYEEID" type="ofx:PayeeIdType" minOccurs="0"/>
317
+ # <xsd:choice minOccurs="0">
318
+ # <xsd:element name="NAME" type="ofx:GenericNameType"/>
319
+ # <xsd:element name="PAYEE" type="ofx:Payee"/>
320
+ # </xsd:choice>
321
+ # <xsd:choice minOccurs="0">
322
+ # <xsd:element name="BANKACCTTO" type="ofx:BankAccount"/>
323
+ # <xsd:element name="CCACCTTO" type="ofx:CreditCardAccount"/>
324
+ # </xsd:choice>
325
+ # <xsd:element name="MEMO" type="ofx:MessageType" minOccurs="0"/>
326
+ # <xsd:choice minOccurs="0">
327
+ # <xsd:element name="CURRENCY" type="ofx:Currency"/>
328
+ # <xsd:element name="ORIGCURRENCY" type="ofx:Currency"/>
329
+ # </xsd:choice>
330
+ # <xsd:element name="INV401KSOURCE" type="ofx:Investment401kSourceEnum" minOccurs="0"/>
331
+ # </xsd:sequence>
332
+ # </xsd:complexType>
333
+ #
334
+ def to_ofx
335
+ buf = ""
336
+ # Set margin=5 to indent it nicely within the output from Statement.to_ofx
337
+ x = Builder::XmlMarkup.new(:target => buf, :indent => 2, :margin=>5)
338
+ x.STMTTRN { # transaction statement
339
+ x.TRNTYPE type
340
+ x.DTPOSTED Bankjob.date_time_to_ofx(date) #Date transaction was posted to account, [datetime] yyyymmdd or yyyymmddhhmmss
341
+ x.TRNAMT amount #Ammount of transaction [amount] can be , or . separated
342
+ x.FITID ofx_id
343
+ x.CHECKNUM check_number unless check_number.nil?
344
+ buf << payee.to_ofx unless payee.nil?
345
+ #x.NAME description
346
+ x.MEMO description
347
+ }
348
+ return buf
349
+ end
350
+
351
+ ##
352
+ # Produces a string representation of the transaction
353
+ #
354
+ def to_s
355
+ "#{self.class} - ofx_id: #{@ofx_id}, date:#{@date}, raw description: #{@raw_description}, type: #{@type} amount: #{@amount}, new balance: #{@new_balance}"
356
+ end
357
+
358
+ ##
359
+ # Overrides == to allow comparison of Transaction objects so that they can
360
+ # be merged in Statements. See Statement#merge
361
+ #
362
+ def ==(other) #:nodoc:
363
+ if other.kind_of?(Transaction)
364
+ # sometimes the same date, when written and read back will not appear equal so convert to
365
+ # a canonical string first
366
+ return (Bankjob.date_time_to_ofx(@date) == Bankjob.date_time_to_ofx(other.date) and
367
+ # ignore value date - it may be updated between statements
368
+ # (consider using ofx_id here later)
369
+ @raw_description == other.raw_description and
370
+ @amount == other.amount and
371
+ @type == other.type and
372
+ @new_balance == other.new_balance)
373
+ end
374
+ end
375
+
376
+ #
377
+ # Overrides eql? so that array union will work when merging statements
378
+ #
379
+ def eql?(other) #:nodoc:
380
+ return self == other
381
+ end
382
+
383
+ ##
384
+ # Overrides hash so that array union will work when merging statements
385
+ #
386
+ def hash() #:nodoc:
387
+ prime = 31;
388
+ result = 1;
389
+ result = prime * result + @amount.to_i
390
+ result = prime * result + @new_balance.to_i
391
+ result = prime * result + (@date.nil? ? 0 : Bankjob.date_time_to_ofx(@date).hash);
392
+ result = prime * result + (@raw_description.nil? ? 0 : @raw_description.hash);
393
+ result = prime * result + (@type.nil? ? 0 : @type.hash);
394
+ # don't use value date
395
+ return result;
396
+ end
397
+
398
+ end # class Transaction
399
+ end # module
400
+
@@ -0,0 +1,133 @@
1
+ require 'rubygems'
2
+ require 'mechanize'
3
+ require 'hpricot'
4
+ require 'bankjob'
5
+
6
+ # Later versions of Mechanize no longer use Hpricot by default
7
+ # but have an attribute we can set to use it
8
+ begin
9
+ WWW::Mechanize.html_parser = Hpricot
10
+ rescue NoMethodError
11
+ end
12
+
13
+ include Bankjob
14
+
15
+ ##
16
+ # BaseScraper is a specific example of a Bankjob Scraper that can be used as a base
17
+ # class for scrapers that follow a typical pattern.
18
+ #
19
+ # In fact, it does not add much functionality and you could just as readily subclass
20
+ # the Scraper class as this class, but here is what it does add:
21
+ # *+scraper_args+ attribute holds the array of args specified by the -scraper_args command line option
22
+ # *+scrape_statement+ is implemented to use the --input command line option to specify a file for input
23
+ # so that you can save a web-page to a file for debugging
24
+ # *+scrape_statement+ instantiates a Mechanize agent and delegates to two other
25
+ # simple methods that must be overridden in a subclass.
26
+ #
27
+ # Specifically +scrape_statement+ passes the Mechanize agent to +fetch_transactions_page+
28
+ # then passes the resulting page to +parse_transactions_page. Subclasses must implement these two methods.
29
+ # See the documentation for these methods for more details on how to implement them.
30
+ # Note that failure to override either method will result in an exception.
31
+ #
32
+ class BaseScraper < Scraper
33
+
34
+ # +scraper_args+ holds the array of arguments specified on the command line with
35
+ # the -scraper_args option. It is not used here, but it is set in the scrape_statement
36
+ # method so that you can access it in your subclass.
37
+ attr_accessor :scraper_args
38
+
39
+ # This rule goes last and sets the type of any transactions
40
+ # that are still set to OTHER to be the generic CREDIT or DEBIT
41
+ # depending on the real amount of the transaction
42
+ # +prioirity+ set to -999 to ensure it's last
43
+ transaction_rule(-999) do |tx|
44
+ if (tx.type == Transaction::OTHER)
45
+ if tx.real_amount > 0
46
+ tx.type = Transaction::CREDIT
47
+ elsif tx.real_amount < 0
48
+ tx.type = Transaction::DEBIT
49
+ end
50
+ # else leave it as OTHER if it's exactly zero
51
+ end
52
+ end
53
+
54
+ ##
55
+ # Override +fetch_transactions_page+ to use the mechanize +agent+ to
56
+ # load the page holding your bank statement on your online banking website.
57
+ # By using agent.get(url) to fetch the page, the returned page will be
58
+ # an Hpricot document ready for parsing.
59
+ #
60
+ # Typically you will need to log-in using a form on a login page first.
61
+ # Your implementation may look something like this:
62
+ #
63
+ # # My online banking app has a logon page with a standard HTML form.
64
+ # # by looking at the source of the page I see that the form is named
65
+ # # 'MyLoginFormName' and the two text fields for user name and password
66
+ # # are called 'USERNAME' and 'PASSWORD' respectively.
67
+ # login_page = agent.get("http://mybankapp.com/login.html")
68
+ # form = login_page.forms.name('MyLoginFormName').first
69
+ # # Mechanize automatically makes constants for the form elements based on their names.
70
+ # form.USERNAME = "me"
71
+ # form.PASSWORD = "foo"
72
+ # agent.submit(form)
73
+ # sleep 3 #wait while the login takes effect
74
+ #
75
+ # # Now that I've logged in and waited a bit, navigate to the page that lists
76
+ # # my recent transactions and return it
77
+ # return agent.get("http://mybankapp.com/latesttransactions.html")
78
+ #
79
+ def fetch_transactions_page(agent)
80
+ raise "You must override fetch_transactions_page in your subclass of BaseScraper " +
81
+ "or just subclass Scraper instead and override scrape_statement"
82
+ end
83
+
84
+ ##
85
+ # Override +parse_transactions_page+ to take the Hpricot document passed in
86
+ # as +page+, parse it using Hpricot directives, and create a Statement object
87
+ # holding a set of Transaction objects for it.
88
+ #
89
+ def parse_transactions_page(page)
90
+ raise "You must override parse_transactions_page in your subclass of BaseScraper " +
91
+ "or just subclass Scraper instead and override scrape_statement"
92
+ end
93
+
94
+ ##
95
+ # Implements the one essential method of a scraper +scrape_statement+
96
+ # by calling +fetch_transactions_page+ to get a web page holding a bank
97
+ # statement followed by a call to +parse_transactions_page+ that returns
98
+ # the +Statement+ object.
99
+ #
100
+ # Do not override this method in a subclass. (If you want to override it
101
+ # you should be subclassing Scraper instead of this class)
102
+ #
103
+ # If the --input argument has been used to specify and input html file to
104
+ # use, this will be parsed directly instead of calling +fetch_transaction_page+.
105
+ # This allows for easy debugging without slow web-scraping (simply view
106
+ # the page in a regular browser and use Save Page As to save a local copy
107
+ # of it, then specify thiswith the --input command-line arg)
108
+ #
109
+ # +args+ holds the array of arguments specified on the command line with
110
+ # the -scraper_args option. It is not used here, but it is set on an
111
+ # attribute called scraper_args and is thus accessible in your subclass.
112
+ #
113
+ def scrape_statement(args)
114
+ self.scraper_args = args
115
+ if (not options.input.nil?) then
116
+ # used for debugging - load the page from a file instead of the web
117
+ logger.debug("Reading debug input html from #{options.input} instead of scraping the real website.")
118
+ page = Hpricot(open(options.input))
119
+ else
120
+ # not debugging use the actual scraper
121
+ # First create a mechanize agent: a sort of pretend web browser
122
+ agent = WWW::Mechanize.new
123
+ agent.user_agent_alias = 'Windows IE 6' # pretend that we're IE 6.0
124
+
125
+ page = fetch_transactions_page(agent)
126
+ end
127
+ raise "BaseScraper failed to load the transactions page" if page.nil?
128
+ # Now that we've feteched the page, parse it to get a statement
129
+ statement = parse_transactions_page(page)
130
+ return statement
131
+ end
132
+ end # BaseScraper
133
+