bankjob 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/PostInstall.txt +4 -0
- data/README.rdoc +77 -0
- data/bin/bankjob +10 -0
- data/lib/bankjob.rb +12 -0
- data/lib/bankjob/bankjob_runner.rb +184 -0
- data/lib/bankjob/cli.rb +258 -0
- data/lib/bankjob/payee.rb +114 -0
- data/lib/bankjob/scraper.rb +495 -0
- data/lib/bankjob/statement.rb +355 -0
- data/lib/bankjob/support.rb +217 -0
- data/lib/bankjob/transaction.rb +400 -0
- data/scrapers/base_scraper.rb +133 -0
- data/scrapers/bpi_scraper.rb +190 -0
- data/spec/bankjob_cli_spec.rb +15 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +10 -0
- data/spec/statement_spec.rb +121 -0
- data/spec/transaction_spec.rb +81 -0
- metadata +114 -0
@@ -0,0 +1,114 @@
|
|
1
|
+
|
2
|
+
require 'rubygems'
|
3
|
+
require 'builder'
|
4
|
+
require 'digest/md5'
|
5
|
+
|
6
|
+
module Bankjob
|
7
|
+
|
8
|
+
##
|
9
|
+
# A Payee object represents an entity in a in a bank Transaction that receives a payment.
|
10
|
+
#
|
11
|
+
# A Scraper will create Payees while scraping web pages in an online banking site.
|
12
|
+
# In many cases Payees will not be distinguished in the online bank site in which case
|
13
|
+
# rules will have to be applied to separate the Payees
|
14
|
+
#
|
15
|
+
# A Payee object knows how to write itself as a record in a CSV
|
16
|
+
# (Comma Separated Values) file using +to_csv+ or as an XML element in an
|
17
|
+
# OFX (Open Financial eXchange http://www.ofx.net) file using +to_ofx+
|
18
|
+
#
|
19
|
+
class Payee
|
20
|
+
|
21
|
+
# name of the payee
|
22
|
+
# Translates to OFX element NAME
|
23
|
+
attr_accessor :name
|
24
|
+
|
25
|
+
# address of the payee
|
26
|
+
# Translates to OFX element ADDR1
|
27
|
+
#-- TODO Consider ADDR2,3
|
28
|
+
attr_accessor :address
|
29
|
+
|
30
|
+
# city in which the payee is located
|
31
|
+
# Translates to OFX element CITY
|
32
|
+
attr_accessor :city
|
33
|
+
|
34
|
+
# state in which the payee is located
|
35
|
+
# Translates to OFX element STATE
|
36
|
+
attr_accessor :state
|
37
|
+
|
38
|
+
# post code or zip in which the payee is located
|
39
|
+
# Translates to OFX element POSTALCODE
|
40
|
+
attr_accessor :postalcode
|
41
|
+
|
42
|
+
# country in which the payee is located
|
43
|
+
# Translates to OFX element COUNTRY
|
44
|
+
attr_accessor :country
|
45
|
+
|
46
|
+
# phone number of the payee
|
47
|
+
# Translates to OFX element PHONE
|
48
|
+
attr_accessor :phone
|
49
|
+
|
50
|
+
##
|
51
|
+
# Generates a string representing this Payee as a single string for use
|
52
|
+
# in a comma separated values column
|
53
|
+
#
|
54
|
+
def to_csv
|
55
|
+
name
|
56
|
+
end
|
57
|
+
|
58
|
+
##
|
59
|
+
# Generates an XML string adhering to the OFX standard
|
60
|
+
# (see Open Financial Exchange http://www.ofx.net)
|
61
|
+
# representing a single Payee XML element.
|
62
|
+
#
|
63
|
+
# The schema for the OFX produced is
|
64
|
+
#
|
65
|
+
# <xsd:complexType name="Payee">
|
66
|
+
# <xsd:annotation>
|
67
|
+
# <xsd:documentation>
|
68
|
+
# The OFX element "PAYEE" is of type "Payee"
|
69
|
+
# </xsd:documentation>
|
70
|
+
# </xsd:annotation>
|
71
|
+
# <xsd:sequence>
|
72
|
+
# <xsd:element name="NAME" type="ofx:GenericNameType"/>
|
73
|
+
# <xsd:sequence>
|
74
|
+
# <xsd:element name="ADDR1" type="ofx:AddressType"/>
|
75
|
+
# <xsd:sequence minOccurs="0">
|
76
|
+
# <xsd:element name="ADDR2" type="ofx:AddressType"/>
|
77
|
+
# <xsd:element name="ADDR3" type="ofx:AddressType" minOccurs="0"/>
|
78
|
+
# </xsd:sequence>
|
79
|
+
# </xsd:sequence>
|
80
|
+
# <xsd:element name="CITY" type="ofx:AddressType"/>
|
81
|
+
# <xsd:element name="STATE" type="ofx:StateType"/>
|
82
|
+
# <xsd:element name="POSTALCODE" type="ofx:ZipType"/>
|
83
|
+
# <xsd:element name="COUNTRY" type="ofx:CountryType" minOccurs="0"/>
|
84
|
+
# <xsd:element name="PHONE" type="ofx:PhoneType"/>
|
85
|
+
# </xsd:sequence>
|
86
|
+
# </xsd:complexType>
|
87
|
+
#
|
88
|
+
def to_ofx
|
89
|
+
buf = ""
|
90
|
+
# Set margin=6 to indent it nicely within the output from Transaction.to_ofx
|
91
|
+
x = Builder::XmlMarkup.new(:target => buf, :indent => 2, :margin=>6)
|
92
|
+
x.PAYEE {
|
93
|
+
x.NAME name
|
94
|
+
x.ADDR1 address
|
95
|
+
x.CITY city
|
96
|
+
x.STATE state
|
97
|
+
x.POSTALCODE postalcode
|
98
|
+
x.COUNTRY country unless country.nil? # minOccurs="0" in schema (above)
|
99
|
+
x.PHONE phone
|
100
|
+
}
|
101
|
+
return buf
|
102
|
+
end
|
103
|
+
|
104
|
+
##
|
105
|
+
# Produces the Payee as a row of comma separated values
|
106
|
+
# (delegates to +to_csv+)
|
107
|
+
#
|
108
|
+
def to_s
|
109
|
+
to_csv
|
110
|
+
end
|
111
|
+
|
112
|
+
end # class Payee
|
113
|
+
end # module
|
114
|
+
|
@@ -0,0 +1,495 @@
|
|
1
|
+
|
2
|
+
require 'rubygems'
|
3
|
+
require 'mechanize'
|
4
|
+
require 'logger'
|
5
|
+
require 'bankjob'
|
6
|
+
|
7
|
+
module Bankjob
|
8
|
+
|
9
|
+
##
|
10
|
+
# The Scraper class is the basis of all Bankjob web scrapers for scraping specific
|
11
|
+
# bank websites.
|
12
|
+
#
|
13
|
+
# To create your own scraper simply subclass Scraper and be sure to override
|
14
|
+
# the method +scrape_statement+ to perform the scraping and return a
|
15
|
+
# Bankjob::Statement object.
|
16
|
+
#
|
17
|
+
# Scraper provides some other optional methods to help you build Statements:
|
18
|
+
#
|
19
|
+
# +currency+:: use this class attribute to set the OFX currency at the top of
|
20
|
+
# your Scraper subclass definition. E.g.:
|
21
|
+
#
|
22
|
+
#
|
23
|
+
# class MyScraper < Scraper
|
24
|
+
# currency "USD"
|
25
|
+
# ...
|
26
|
+
#
|
27
|
+
# It defaults to "EUR" for euros.
|
28
|
+
#
|
29
|
+
# +decimal+:: use this class attribute to set the decimal separator at the top of
|
30
|
+
# your Scraper subclass definition. E.g.:
|
31
|
+
#
|
32
|
+
# class MyScraper < Scraper
|
33
|
+
# decimal ","
|
34
|
+
# ...
|
35
|
+
#
|
36
|
+
# It defaults to "." (period), the common alternative being "," (comma)
|
37
|
+
#
|
38
|
+
# Note that this should be set to the separator used in the +amount+
|
39
|
+
# attribute of the Transaction objects your Scraper creates. If, say,
|
40
|
+
# you deliberately scrape values like "12,34" and convert them to
|
41
|
+
# "12.34" before storing them in your Transaction, then leave the
|
42
|
+
# decimal as ".".
|
43
|
+
# If you choose to store the Transaction amount with as "12,34",
|
44
|
+
# however, the +decimal+ setting becomes important when calling
|
45
|
+
# Transaction#real_amount to get the amount as a Float upon which
|
46
|
+
# calculations can be performed.
|
47
|
+
#
|
48
|
+
# +options+:: holds the command line options provided when Bankjob was launched.
|
49
|
+
# Use this attribute to get access to global options. For your scraper
|
50
|
+
# specific options use the array passed into +scrape_statement+ instead.
|
51
|
+
# (See #options below for more advice on how to use this)
|
52
|
+
#
|
53
|
+
# +logger+:: holds the logger initialized by Bankjob based on the command line
|
54
|
+
# options. Use this to attribute to log information, warnings and debug messages
|
55
|
+
# from your logger.
|
56
|
+
# (See #logger below for more advice on how to use this)
|
57
|
+
#
|
58
|
+
# +create_statement+:: creates a new empty Statement object with the appropriate
|
59
|
+
# default attributes (that is, the right currency)
|
60
|
+
# Use this in your Scraper to instantiate new Statement objects.
|
61
|
+
#
|
62
|
+
# +create_transaction+:: creates a new empty Transaction object with the appropriate
|
63
|
+
# default attributes (that is, the right decimal separator)
|
64
|
+
# Use this in your Scraper to instantiate new Transaction objects.
|
65
|
+
#
|
66
|
+
# +transaction_rule+:: registers a rule to be applied to all transactions after the
|
67
|
+
# statement has been scraped.
|
68
|
+
# Define as many of these as you need in your craper to build better
|
69
|
+
# organized Transaction objects with clearer descriptions of the
|
70
|
+
# transaction, etc.
|
71
|
+
#
|
72
|
+
# Here is an example of a simple (but incomplete) scraper.
|
73
|
+
# Note that all of the scraping and parsing is in the +scrape_statement+ method, although
|
74
|
+
# a lot of the details of Hpricot parsing are left up to the imagination of the reader.
|
75
|
+
#
|
76
|
+
# When creating a scraper yourself look in the +scrapers+ directory of the bankjob gem
|
77
|
+
# to see some more useful examples.
|
78
|
+
#
|
79
|
+
# class AcmeBankScraper < Scraper
|
80
|
+
# #####
|
81
|
+
# # 1. Set up the Scraper properties for currency and separator
|
82
|
+
# # (this is optional)
|
83
|
+
#
|
84
|
+
# currency "EUR" # set the currency (EUR is the default anyway but just to demo..)
|
85
|
+
# decimal "," # set the decimal separator to comma instead of .
|
86
|
+
#
|
87
|
+
# #####
|
88
|
+
# # 2. Create some rules to post-process my transactions
|
89
|
+
# # (this is optional but is easier to maintain than manipulating
|
90
|
+
# # the values in the scraper itself)
|
91
|
+
#
|
92
|
+
# # rule to set negative transactions as debits
|
93
|
+
# transaction_rule do |tx|
|
94
|
+
# tx.type = "DEBIT" if (tx.real_amount < 0 and tx.type == "OTHER")
|
95
|
+
# end
|
96
|
+
#
|
97
|
+
# # General description parsing rule
|
98
|
+
# transaction_rule do |tx|
|
99
|
+
# case tx.description
|
100
|
+
# when /ATM/i
|
101
|
+
# tx.type = "ATM"
|
102
|
+
# when /ELEC PURCHASE/
|
103
|
+
# tx.description.gsub!(/ELEC PURCHASE \d+/, "spent with ATM card: ")
|
104
|
+
# end
|
105
|
+
# end
|
106
|
+
#
|
107
|
+
# #####
|
108
|
+
# # 3. Implement main engine of the scraper
|
109
|
+
# # (this is essential and where 99% of the work is)
|
110
|
+
#
|
111
|
+
# def scrape_statement(args)
|
112
|
+
#
|
113
|
+
# logger.debug("Reading debug input html from #{options.input} instead of scraping the real website.")
|
114
|
+
# agent = WWW::Mechanize.new
|
115
|
+
# agent.user_agent_alias = 'Windows IE 6' # pretend that we're IE 6.0
|
116
|
+
# # navigate to the login page
|
117
|
+
# login_page = agent.get("http://mybank.com/login")
|
118
|
+
# # find login form, fill it out and submit it
|
119
|
+
# form = login_page.forms.name('myBanksLoginForm').first
|
120
|
+
# # Mechanize creates constants like USERNAME for the form element it finds with that name
|
121
|
+
# form.USERNAME = args[0] # assuming -scraper_args "user password"
|
122
|
+
# form.PASSWORD = args[1]
|
123
|
+
# agent.submit(form)
|
124
|
+
# sleep 3 #wait while the login takes effect
|
125
|
+
#
|
126
|
+
# transactions_page = agent.get("http://mybank.com/transactions")
|
127
|
+
# statement = create_statement
|
128
|
+
#
|
129
|
+
# # ... go read the Hpricot documentation to work out how to get your transactions out of
|
130
|
+
# # the transactions_page and create a new transaction object for each one
|
131
|
+
# # We're going to gloss over that part here ....
|
132
|
+
#
|
133
|
+
# table = # use Hpricot to get the html table element assuming your transactions are in a table
|
134
|
+
# rows = (table/"tr[@valign=top]") # works for a table where the rows needed have the valign attr set to top
|
135
|
+
# rows.each do |row|
|
136
|
+
# transaction = create_transaction
|
137
|
+
# transaction.date = #... scrape a date here
|
138
|
+
# ...
|
139
|
+
# statement.transactions <<
|
140
|
+
# end
|
141
|
+
# end
|
142
|
+
# end
|
143
|
+
#
|
144
|
+
#--
|
145
|
+
# (Non RDOC comment) There are two parts to the Scraper class:
|
146
|
+
# - the public part which defines the
|
147
|
+
# method to be overridden in subclasses and provides utility methods and attributes;
|
148
|
+
# - the private internal part which handles the mechanics of registering a
|
149
|
+
# subclass as the scraper to be used, setting the currency and decimal attributes
|
150
|
+
# and registering transaction rules
|
151
|
+
#
|
152
|
+
#
|
153
|
+
class Scraper
|
154
|
+
|
155
|
+
##
|
156
|
+
# Provides access to a logger instance created in the BankjobRunner which
|
157
|
+
# subclasses can use for logging if they need to.
|
158
|
+
#
|
159
|
+
# To use this in your own scraper, use code like:
|
160
|
+
#
|
161
|
+
# include 'logger'
|
162
|
+
# ...
|
163
|
+
# logger.debug("MyScraper is scraping the page at #{my_url}")
|
164
|
+
# logger.info("MyScraper fetched new statement from MyBank and has been sitting in my chair")
|
165
|
+
# logger.warn("MyScraper's been sitting in MY chair!")
|
166
|
+
# logger.fatal("MyScraper's been sitting in MY CHAIR and IT'S ALL BROKEN!")
|
167
|
+
#
|
168
|
+
attr_accessor :logger
|
169
|
+
|
170
|
+
##
|
171
|
+
# Provides access to the command line options which subclasses can use it if
|
172
|
+
# they need access to the global options used to launch Bankjob
|
173
|
+
#
|
174
|
+
# To use this in your own scraper, use code like:
|
175
|
+
#
|
176
|
+
# if (options.input?) then
|
177
|
+
# print "the input html file for debugging is #{options.input}
|
178
|
+
# end
|
179
|
+
#
|
180
|
+
attr_accessor :options
|
181
|
+
|
182
|
+
##
|
183
|
+
# Returns the decimal separator for this scraper
|
184
|
+
# This is typically set in the scraper class using the "decimal" directive.
|
185
|
+
#
|
186
|
+
def decimal
|
187
|
+
@@decimal
|
188
|
+
end
|
189
|
+
|
190
|
+
##
|
191
|
+
# Returns the OFX currency for this scraper.
|
192
|
+
# This is typically set in the scraper class using the "currency" directive.
|
193
|
+
#
|
194
|
+
def currency
|
195
|
+
@@currency
|
196
|
+
end
|
197
|
+
|
198
|
+
##
|
199
|
+
# Sets the decimal separator for the money amounts used in the data fetched
|
200
|
+
# by this scraper.
|
201
|
+
# The scraper class can use this as a directive to set the separator so:
|
202
|
+
# decimal ","
|
203
|
+
#
|
204
|
+
# Defaults to period ".", but will typically need to be set as a comma in
|
205
|
+
# european websites
|
206
|
+
#
|
207
|
+
def self.decimal(decimal)
|
208
|
+
@@decimal = decimal
|
209
|
+
end
|
210
|
+
|
211
|
+
##
|
212
|
+
# Sets the OFX currency name for use in the OFX statements produced by
|
213
|
+
# this scraper.
|
214
|
+
#
|
215
|
+
# The scraper class can use this as a directive to set the separator so:
|
216
|
+
# currency "USD"
|
217
|
+
#
|
218
|
+
# Defaults to EUR
|
219
|
+
#
|
220
|
+
def self.currency(currency)
|
221
|
+
@@currency = currency
|
222
|
+
end
|
223
|
+
|
224
|
+
##
|
225
|
+
# Sets the account number for statements produced by this statement.
|
226
|
+
#
|
227
|
+
# The scraper class can use this as a directive to set the number so:
|
228
|
+
# account_number "12345678"
|
229
|
+
#
|
230
|
+
# Must be a string from 1 to 22 chars in length
|
231
|
+
#
|
232
|
+
# This will be used by the create_statement method to set the account,
|
233
|
+
# but the scraper may ignore this and simply construct its own statements
|
234
|
+
# or change the number using the accessor: statement.account_number =
|
235
|
+
# after constructing it.
|
236
|
+
#
|
237
|
+
# The scraper class can use this as a directive to set the separator so:
|
238
|
+
# currency "USD"
|
239
|
+
#
|
240
|
+
# Defaults to EUR
|
241
|
+
#
|
242
|
+
def self.account_number(account_number)
|
243
|
+
@@account_number = account_number
|
244
|
+
end
|
245
|
+
|
246
|
+
##
|
247
|
+
# Sets the account type for statements produced by this statement.
|
248
|
+
#
|
249
|
+
# The scraper class can use this as a directive to set the type so:
|
250
|
+
# account_type Statement::SAVINGS
|
251
|
+
#
|
252
|
+
# Must be a string based on one of the constants in Statement
|
253
|
+
#
|
254
|
+
# This will be used by the create_statement method to set the account type,
|
255
|
+
# but the scraper may ignore this and simply construct its own statements
|
256
|
+
# or change the type using the accessor: statement.account_type =
|
257
|
+
# after constructing it.
|
258
|
+
#
|
259
|
+
# Defaults to Statement::CHECKING
|
260
|
+
#
|
261
|
+
def self.account_type(account_type)
|
262
|
+
@@account_type = account_type
|
263
|
+
end
|
264
|
+
|
265
|
+
##
|
266
|
+
# Sets the bank identifier for statements produced by this statement.
|
267
|
+
#
|
268
|
+
# The scraper class can use this as a directive to set the number so:
|
269
|
+
# bank_id "12345678"
|
270
|
+
#
|
271
|
+
# Must be a string from 1 to 9 chars in length
|
272
|
+
#
|
273
|
+
# This will be used by the create_statement method to set the bank id,
|
274
|
+
# but the scraper may ignore this and simply construct its own statements
|
275
|
+
# or change the number using the accessor: statement.bank_id =
|
276
|
+
# after constructing it.
|
277
|
+
#
|
278
|
+
# Defaults to blank
|
279
|
+
#
|
280
|
+
def self.bank_id(bank_id)
|
281
|
+
@@bank_id = bank_id
|
282
|
+
end
|
283
|
+
|
284
|
+
##
|
285
|
+
# ScraperRule is a struct used for holding a rule body with its priority.
|
286
|
+
# Users can create transaction rules in their Scraper subclasses using
|
287
|
+
# the Scraper#ransaction_rule method.
|
288
|
+
ScraperRule = Struct.new(:priority, :rule_body)
|
289
|
+
|
290
|
+
##
|
291
|
+
# Processes a transaction after it has been created to allow it to be manipulated
|
292
|
+
# into a more useful form for the client.
|
293
|
+
#
|
294
|
+
# For example, the transaction description might be simplified to remove certain
|
295
|
+
# common strings, or the Payee details might be extracted from the description.
|
296
|
+
#
|
297
|
+
# Implementing this as a class method using a block permits the user to add
|
298
|
+
# implement transaction processing rules by calling this method several times
|
299
|
+
# rather than implementing a single method (gives it a sort of DSL look)
|
300
|
+
#
|
301
|
+
# E.g.
|
302
|
+
# # This rule detects ATM withdrawals and modifies
|
303
|
+
# # the description and sets the the type it uses
|
304
|
+
# transaction_rule do |tx|
|
305
|
+
# if (tx.real_amount < 0)
|
306
|
+
# if tx.raw_description =~ /WDR.*ATM\s+\d+\s+/i
|
307
|
+
# # $' holds whatever is after the pattern match - usually the ATM location
|
308
|
+
# tx.description = "ATM withdrawal at #{$'}"
|
309
|
+
# tx.type = Transaction::ATM
|
310
|
+
# end
|
311
|
+
# end
|
312
|
+
# end
|
313
|
+
#
|
314
|
+
#
|
315
|
+
# A transaction rule can optionally specifiy a +priority+ - any integer value.
|
316
|
+
# The default priority is zero, with lower priority rules being executed last.
|
317
|
+
#
|
318
|
+
# The final order in which transaction rules will be executed is thus:
|
319
|
+
# * rules with a higher priority value will be executed before rules with
|
320
|
+
# a lower priority no matter where they are declared
|
321
|
+
# * rules of the same priority declared in the same class wil be executed in
|
322
|
+
# the order in which they are declared - top rules first
|
323
|
+
# * rules in parent classes are executed before rules in subclasses of the
|
324
|
+
# same priority.
|
325
|
+
#
|
326
|
+
# If you really want a rule to be fired last, and you want to allow for
|
327
|
+
# subclasses to your scraper, use a negative priority like this:
|
328
|
+
#
|
329
|
+
# transaction_rule(-999) do |tx|
|
330
|
+
# puts "I get executed last"
|
331
|
+
# end
|
332
|
+
#
|
333
|
+
def self.transaction_rule(priority = 0, &rule_body)
|
334
|
+
@@transaction_rules ||= []
|
335
|
+
rule = ScraperRule.new(priority, rule_body)
|
336
|
+
# Using Array#sort won't work on here (or later) because it doesn't preserve
|
337
|
+
# the order of the rules with equal priorty - thus breaking the
|
338
|
+
# rules of priority detailed above. So we have to sort as we insert
|
339
|
+
# each new rule in order without messing up the equal-priority order
|
340
|
+
# which is first come, first in.
|
341
|
+
# Imagine we have a set of rule already inorder of priority such as:
|
342
|
+
# A:999, B:999, C:0, D:0, E:-999, F:-999
|
343
|
+
# we're now adding X:0, which should come after D since it's added later
|
344
|
+
# First we reverse the array to get
|
345
|
+
# F:-999, E:-999, D:0, C:0, B:999, A:999
|
346
|
+
# then we find the first element with priority greater than or equal to
|
347
|
+
# X's priority of 0. Just greater than won't work because we'll end up
|
348
|
+
# putting X between B and C whereas it was added after D.
|
349
|
+
# So we find D, then get it's index in the original array which is 3
|
350
|
+
# which tells us we can insert X at 4 into the forward-sorted rules
|
351
|
+
#
|
352
|
+
rev = @@transaction_rules.reverse
|
353
|
+
last_higher_or_equal = rev.find { |r| r.priority.to_i >= priority }
|
354
|
+
if last_higher_or_equal.nil?
|
355
|
+
# insert a the start of the list
|
356
|
+
@@transaction_rules.insert(0, rule)
|
357
|
+
else
|
358
|
+
index_of_last = @@transaction_rules.index(last_higher_or_equal)
|
359
|
+
# now insert it after the last higher or equal priority rule
|
360
|
+
@@transaction_rules.insert(index_of_last + 1, rule)
|
361
|
+
end
|
362
|
+
end
|
363
|
+
|
364
|
+
##
|
365
|
+
# Runs through all of the rules registered with calls to +transaction_rule+
|
366
|
+
# and applies them to each Transaction in the specified +statement+.
|
367
|
+
#
|
368
|
+
# Bankjob calls this after +scrape_statement+ and before writing out the
|
369
|
+
# statement to CSV or OFX
|
370
|
+
#
|
371
|
+
def self.post_process_transactions(statement) #:nodoc:
|
372
|
+
if defined?(@@transaction_rules)
|
373
|
+
@@transaction_rules.each do |rule|
|
374
|
+
statement.transactions.each do |transaction|
|
375
|
+
rule.rule_body.call(transaction)
|
376
|
+
end
|
377
|
+
end
|
378
|
+
end
|
379
|
+
return statement
|
380
|
+
end
|
381
|
+
|
382
|
+
##
|
383
|
+
# Scrapes a website to produce a new Statement object.
|
384
|
+
#
|
385
|
+
# This is the one method which a Scraper *must* implement by overriding
|
386
|
+
# this method.
|
387
|
+
#
|
388
|
+
# Override this in your own Scraper to use Mechanize and Hpricot (or
|
389
|
+
# some other mechanism if you prefer) to parse your bank website
|
390
|
+
# and create a Bankjob::Statement object to hold the data.
|
391
|
+
#
|
392
|
+
# The implementation here will raise an error if not overridden.
|
393
|
+
#
|
394
|
+
def scrape_statement
|
395
|
+
raise "You must override the instance method scrape_statement in your scraper!"
|
396
|
+
end
|
397
|
+
|
398
|
+
##
|
399
|
+
# Creates a new Statement.
|
400
|
+
#
|
401
|
+
# Calling this method is the preferred way of creating a new Statement object
|
402
|
+
# since it sets the OFX currency (and possibly other attributes) based on the
|
403
|
+
# values set in the definition of the Scraper subclass.
|
404
|
+
# It is otherwise no different, however, than calling Statement.new() yourself.
|
405
|
+
#
|
406
|
+
def create_statement
|
407
|
+
statement = Statement.new(@@account_number, @@currency)
|
408
|
+
statement.bank_id = @@bank_id if defined?(@@bank_id)
|
409
|
+
statement.account_type = @@account_type if defined?(@@account_type)
|
410
|
+
return statement
|
411
|
+
end
|
412
|
+
|
413
|
+
##
|
414
|
+
# Creates a new Transaction.
|
415
|
+
#
|
416
|
+
# Calling this method is the preferred way of creating a new Transaction object
|
417
|
+
# since it sets the decimal separator (and possibly other attributes) based on the
|
418
|
+
# values set in the definition of the Scraper subclass.
|
419
|
+
#
|
420
|
+
# It is otherwise no different, however, than calling Transaction.new() yourself.
|
421
|
+
#
|
422
|
+
def create_transaction
|
423
|
+
Transaction.new(@@decimal)
|
424
|
+
end
|
425
|
+
|
426
|
+
##
|
427
|
+
# Private
|
428
|
+
#
|
429
|
+
# The internal workings of the Scraper come after this point - they
|
430
|
+
# are not documented in RDOC
|
431
|
+
##
|
432
|
+
|
433
|
+
#SCRAPER_INTERFACE is the list of methods that a scraper must define
|
434
|
+
SCRAPER_INTERFACE = [:scrape_statement]
|
435
|
+
|
436
|
+
# set up the directories in which user's scrapers will be sought
|
437
|
+
HOME_DIR = File.dirname(__FILE__);
|
438
|
+
SCRAPERS_DIR = File.join(HOME_DIR, "..", "..", "scrapers")
|
439
|
+
|
440
|
+
##
|
441
|
+
# +inherited+ is always called when a class extends Scraper.
|
442
|
+
# The subclass itself is passed in as +scraper_class+ alllowing
|
443
|
+
# us to register it to be instantiated later
|
444
|
+
#
|
445
|
+
def self.inherited(scraper_class) #:nodoc:
|
446
|
+
# verify that the scraper class indeed defines the necessary methods
|
447
|
+
SCRAPER_INTERFACE.each do |method|
|
448
|
+
if (not scraper_class.public_method_defined?(method))
|
449
|
+
raise "Invalid scraper: the scraper class #{scraper_class.name} does not define the method #{method}"
|
450
|
+
end
|
451
|
+
end
|
452
|
+
# in the future we might keep a registry of scrapers but for now
|
453
|
+
# we assume there will always be one, and just register that class
|
454
|
+
@@last_scraper_class = scraper_class
|
455
|
+
end
|
456
|
+
|
457
|
+
##
|
458
|
+
# This is the main method of the dynamic Scraper-loader: It loads
|
459
|
+
# the actual scraper ruby file and initializes the class therein.
|
460
|
+
#
|
461
|
+
# Note that no assumption is made about the name of the class
|
462
|
+
# defined within the specified +scraper_filename+. Rather, the
|
463
|
+
# +self.inherited+ method will hold a reference to the last
|
464
|
+
# class loaded that extends Bankjob::Scraper and that reference
|
465
|
+
# is used here to initialize the class immediately after load()
|
466
|
+
# is called on the specified file.
|
467
|
+
#
|
468
|
+
def self.load_scraper(scraper_filename, options, logger) #:nodoc:
|
469
|
+
# temporarily add the same dir as bankjob and the scrapers dir
|
470
|
+
# to the ruby LOAD_PATH for finding the scraper
|
471
|
+
begin
|
472
|
+
$:.unshift(HOME_DIR)
|
473
|
+
$:.unshift(SCRAPERS_DIR)
|
474
|
+
logger.debug("About to load the scraper file named #{scraper_filename}")
|
475
|
+
load(scraper_filename)
|
476
|
+
rescue Exception => e
|
477
|
+
logger.error("Failed to load the scraper file #{scraper_filename} due to #{e.message}.\n\t#{e.backtrace[0]}")
|
478
|
+
ensure
|
479
|
+
$:.delete(SCRAPERS_DIR)
|
480
|
+
$:.delete(HOME_DIR)
|
481
|
+
end
|
482
|
+
|
483
|
+
if (not defined?(@@last_scraper_class) or @@last_scraper_class.nil?)
|
484
|
+
raise "Cannot initialize the scraper as none was loaded successfully."
|
485
|
+
else
|
486
|
+
logger.debug("About to instantiate scraper class: #{@@last_scraper_class.name}\n")
|
487
|
+
scraper = @@last_scraper_class.new()
|
488
|
+
scraper.logger = logger
|
489
|
+
scraper.options = options
|
490
|
+
end
|
491
|
+
|
492
|
+
return scraper
|
493
|
+
end # init_scraper
|
494
|
+
end # Scraper
|
495
|
+
end # module Bankjob
|