parse_capitalone_pdf_statement 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5d442f232d7445dc288b32cd2aa6810d118f0a9d
4
+ data.tar.gz: 77a644f514d32bc3ab777c5733e32a92261a5f73
5
+ SHA512:
6
+ metadata.gz: 241704b7f7219fead5c464e9787ecde2d3af95501147efcf844a770a13013eb785aa3d143d5081b10122fe0f8006ed52b0398954e3d3e221d6a8adb793297036
7
+ data.tar.gz: 23dd29b08081e26b56d29163e5b1f3b56de8708e4e5691848fd845a1938c21082edd0ec98078dea078c0c9c82c1009b4f4bb5c0c169273646703681073a63dd8
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/.ruby-gemset ADDED
@@ -0,0 +1 @@
1
+ parse_capitalone_pdf_statement
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ ruby-2.1.4
data/.travis.yml ADDED
@@ -0,0 +1,3 @@
1
+ ---
2
+ language: ruby
3
+ script: COVERAGE=true bundle exec rake build
data/.yardopts ADDED
@@ -0,0 +1,4 @@
1
+ --no-private
2
+ -
3
+ LICENSE.txt
4
+ README.md
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in parse_capitalone_pdf_statement.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Joe Sortelli
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,70 @@
1
+ # parse_capitalone_pdf_statement
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/parse_capitalone_pdf_statement.svg)](http://badge.fury.io/rb/parse_capitalone_pdf_statement)
4
+ [![Build Status](https://travis-ci.org/sortelli/parse_capitalone_pdf_statement.svg?branch=develop)](https://travis-ci.org/sortelli/parse_capitalone_pdf_statement)
5
+ [![Dependency Status](https://gemnasium.com/sortelli/parse_capitalone_pdf_statement.svg)](https://gemnasium.com/sortelli/parse_capitalone_pdf_statement)
6
+
7
+
8
+ The Capital One website only provides a way to download structured
9
+ data of credit card transaction history for the previous 180 days.
10
+ However, you are able to download monthly PDF account statements
11
+ for the previous few years.
12
+
13
+ This library allows you to parse a Capital One PDF monthly statement,
14
+ and access structured transaction history data.
15
+
16
+ ## Installation
17
+
18
+ Add this line to your application's Gemfile:
19
+
20
+ ```ruby
21
+ gem 'parse_capitalone_pdf_statement'
22
+ ```
23
+
24
+ And then execute:
25
+
26
+ ```bash
27
+ % bundle
28
+ ```
29
+
30
+ Or install it yourself as:
31
+
32
+ ```bash
33
+ % gem install parse_capitalone_pdf_statement
34
+ ```
35
+
36
+ ## Convert PDF to JSON
37
+
38
+ Use the ```capitalone_pdf_to_json.rb``` script to convert a PDF
39
+ montly statement to JSON.
40
+
41
+ ```bash
42
+ % capitalone_pdf_to_json.rb my_monthly_statement.pdf > my_monthly_statement.json
43
+ ```
44
+
45
+ ## API Example
46
+
47
+ Parse a PDF monthly statement and print all payments:
48
+
49
+ ```ruby
50
+ require 'parse_capitalone_pdf_statement'
51
+
52
+ statement = CapitalOneStatement.new('/path/to/my_monthly_statement.pdf')
53
+
54
+ statement.payments.each do |payment|
55
+ puts 'Transaction ID: %d' % payment.id
56
+ puts 'Date: %s' % payment.date
57
+ puts 'Description: %s' % payment.description
58
+ puts 'Amount: %.2f' % payment.amount
59
+ end
60
+ ```
61
+
62
+ See the [API
63
+ Documentation](http://sortelli.github.io/parse_capitalone_pdf_statement/frames.html#!CapitalOneStatement.html)
64
+ for more information.
65
+
66
+ ## License
67
+
68
+ Copyright (c) 2014 Joe Sortelli
69
+
70
+ MIT License
data/Rakefile ADDED
@@ -0,0 +1,27 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/testtask'
3
+ require 'yard'
4
+
5
+ YARD::Rake::YardocTask.new
6
+
7
+ Rake::TestTask.new do |t|
8
+ t.test_files = FileList['test/test_*.rb']
9
+ end
10
+
11
+ task :build => :test
12
+
13
+ task :coverage do
14
+ ENV['COVERAGE'] = 'true'
15
+ Rake::Task['test'].execute
16
+ end
17
+
18
+ desc "Create test pdf files"
19
+ task :make_test_pdf do
20
+ text_to_pdf = 'enscript -B -f "Times-Roman6.0" %s --output=- | ps2pdf - > %s'
21
+
22
+ Dir.chdir(File.join(File.dirname(__FILE__), 'test', 'data')) do
23
+ %w{test_statement bad_statement1}.each do |name|
24
+ system(text_to_pdf % [name + '.txt', name + '.pdf'])
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "parse_capitalone_pdf_statement"
5
+
6
+ unless ARGV.size == 1
7
+ $stderr.puts "usage: #{File.basename(__FILE__)} statement.pdf"
8
+ exit 1
9
+ end
10
+
11
+ pdf_path = ARGV.first
12
+
13
+ puts JSON.pretty_generate(CapitalOneStatement.new(pdf_path))
@@ -0,0 +1,261 @@
1
+ require 'parse_capitalone_pdf_statement/version'
2
+ require 'pdf-reader'
3
+ require 'json'
4
+ require 'date'
5
+
6
+ # CapitalOneStatement object, with data parsed from a PDF monthly statement.
7
+ #
8
+ # @!attribute start_date [r]
9
+ # @return [Date] the first day of the monthly statement
10
+ #
11
+ # @!attribute end_date [r]
12
+ # @return [Date] the final day of the monthly statement
13
+ #
14
+ # @!attribute previous_balance [r]
15
+ # @return [Float] the "Previous Balance" field listed in the statement
16
+ #
17
+ # @!attribute total_payments [r]
18
+ # @return [Float] the "Payments and Credits" field listed in the statement
19
+ #
20
+ # @!attribute total_fees [r]
21
+ # @return [Float] the "Fees and Interest Charged" field listed in the statement
22
+ #
23
+ # @!attribute total_transactions [r]
24
+ # @return [Float] the "Transactions" field listed in the statement
25
+ #
26
+ # @!attribute new_balance [r]
27
+ # @return [Float] the New Balance listed in the statement
28
+ #
29
+ # @!attribute payments [r]
30
+ # @return [Array<CapitalOneStatement::Transaction>] array of payment transactions
31
+ #
32
+ # @!attribute transactions [r]
33
+ # @return [Array<CapitalOneStatement::Transaction>] array of charge transactions
34
+ #
35
+ # @!attribute fees [r]
36
+ # @return [Array<CapitalOneStatement::Transaction>] array of fee transactions
37
+
38
+ class CapitalOneStatement
39
+ DATE_REGEX = /(\w{3})\. (\d\d) - (\w{3})\. (\d\d), (\d{4})/
40
+ AMOUNT_REGEX = /\(?\$[\d,]+\.\d\d\)?/
41
+ AMOUNT_ONLY_REGEX = /^ *#{AMOUNT_REGEX.source} *$/
42
+ FEES_REGEX = /Total Fees This Period +(#{AMOUNT_REGEX.source})/
43
+ INTEREST_REGEX = /Total Interest This Period +(#{AMOUNT_REGEX.source})/
44
+ TRANSACTION_REGEX = /^ *(\d+) +(\d\d) ([A-Z][A-Z][A-Z]) (.+[^ ]) +(#{
45
+ AMOUNT_REGEX.source
46
+ }) *$/
47
+
48
+ attr_reader :start_date,
49
+ :end_date,
50
+ :previous_balance,
51
+ :total_payments,
52
+ :total_fees,
53
+ :total_transactions,
54
+ :new_balance,
55
+ :payments,
56
+ :transactions
57
+
58
+ def initialize(pdf_path)
59
+ @dec_from_prev_year = nil
60
+ @year = nil
61
+ @start_date = nil
62
+ @end_date = nil
63
+ @previous_balance = nil
64
+ @new_balance = nil
65
+ @total_payments = nil
66
+ @total_transactions = nil
67
+ @total_fees = nil
68
+ @payments = []
69
+ @transactions = []
70
+ @fees = []
71
+
72
+ parse_from_pdf pdf_path
73
+
74
+ %w{payments transactions fees}.each do |type|
75
+ trxs = "@#{type}"
76
+ total = "@total_#{type}"
77
+
78
+ instance_variable_set(trxs, instance_variable_get(trxs).sort_by {|trx| trx[:id]})
79
+
80
+ check_total(
81
+ type,
82
+ instance_variable_get(total),
83
+ instance_variable_get(trxs).inject(0) {|sum, trx| sum += trx[:amount]}
84
+ )
85
+ end
86
+ end
87
+
88
+ def to_json(*args)
89
+ {
90
+ :start_date => @start_date,
91
+ :end_date => @end_date,
92
+ :previous_balance => @previous_balance,
93
+ :total_payments => @total_payments,
94
+ :total_fees => @total_fees,
95
+ :total_transactions => @total_transactions,
96
+ :new_balance => @new_balance,
97
+ :payments => @payments,
98
+ :transactions => @transactions,
99
+ :fees => @fees
100
+ }.to_json(*args)
101
+ end
102
+
103
+ private
104
+
105
+ def parse_from_pdf(pdf_path)
106
+ PDF::Reader.new(pdf_path).pages.each_with_index do |page, page_num|
107
+ if @year.nil?
108
+ walker = Struct.new(:year, :offset, :start_date, :end_date) do
109
+ def respond_to?(_)
110
+ true
111
+ end
112
+
113
+ def method_missing(name, *args)
114
+ return unless name =~ /show_text/
115
+
116
+ if args.any? {|str| str.to_s =~ DATE_REGEX}
117
+ self.offset = ($1.upcase == 'DEC' && $3.upcase == 'JAN') ? 1 : 0
118
+ self.year = $5.to_i
119
+ self.start_date = Date.parse('%s-%s-%s' % [year - offset, $1, $2])
120
+ self.end_date = Date.parse('%s-%s-%s' % [year, $3, $4])
121
+ end
122
+ end
123
+ end.new
124
+
125
+ page.walk walker
126
+
127
+ @dec_from_prev_year = walker.offset == 1
128
+ @year = walker.year
129
+ @start_date = walker.start_date
130
+ @end_date = walker.end_date
131
+ end
132
+
133
+ enum = page.text.split("\n").each
134
+
135
+ loop do
136
+ current_line = enum.next
137
+ enum.next until (enum.peek rescue nil) != ''
138
+ next_line = (enum.peek rescue nil)
139
+
140
+ parse_pdf_line page_num, current_line, next_line
141
+ end
142
+ end
143
+ end
144
+
145
+ def parse_pdf_line(page_num, line, next_line)
146
+ if @previous_balance.nil?
147
+ amount_strs = line.scan AMOUNT_REGEX
148
+ if amount_strs.size == 5
149
+ @previous_balance,
150
+ @total_payments,
151
+ @total_fees,
152
+ @total_transactions,
153
+ @new_balance = amount_strs.map {|amount| parse_amount(amount)}
154
+
155
+ @total_payments = -@total_payments
156
+ end
157
+ end
158
+
159
+ if line =~ FEES_REGEX && $1 != '$0.00'
160
+ check_billing_cycle
161
+ @fees << Transaction.new(
162
+ @fees.size + 1,
163
+ @end_date, "CAPITAL ONE MEMBER FEE",
164
+ $1,
165
+ parse_amount($1)
166
+ )
167
+ end
168
+
169
+ if line =~ INTEREST_REGEX && $1 != '$0.00'
170
+ check_billing_cycle
171
+ @fees << Transaction.new(
172
+ @fees.size + 1,
173
+ @end_date, "INTEREST CHARGE:PURCHASES",
174
+ $1,
175
+ parse_amount($1)
176
+ )
177
+ end
178
+
179
+ transactions, payments = [(0..78), (80..-1)].map do |index|
180
+ str = line .to_s[index].to_s
181
+ next_str = next_line.to_s[index].to_s
182
+
183
+ repair_transaction_line str, next_str
184
+ end.map do |str|
185
+ parse_transaction(str)
186
+ end.compact.partition do |trx|
187
+ trx[:amount] >= 0
188
+ end
189
+
190
+ @transactions += transactions
191
+ @payments += payments
192
+ end
193
+
194
+ def repair_transaction_line(line, next_line)
195
+ if next_line =~ AMOUNT_ONLY_REGEX && !(line =~ AMOUNT_REGEX)
196
+ line += " #{next_line.strip}"
197
+ else
198
+ line
199
+ end
200
+ end
201
+
202
+ def parse_transaction(line)
203
+ return nil unless line =~ TRANSACTION_REGEX &&
204
+ $4 != "CAPITAL ONE MEMBER FEE"
205
+
206
+ check_billing_cycle
207
+
208
+ year = ($3.upcase == 'DEC' && @dec_from_prev_year) ? @year - 1 : @year
209
+ date = Date.parse('%s-%s-%s' % [year, $3, $2])
210
+
211
+ Transaction.new($1.to_i, date, $4, $5, parse_amount($5))
212
+ end
213
+
214
+ def parse_amount(amount)
215
+ num = amount.gsub(/[^\d.]/, '').to_f
216
+ amount.start_with?(?() ? -num : num
217
+ end
218
+
219
+ def check_billing_cycle
220
+ raise "Failed to determine billing cycle dates" if @year.nil?
221
+ end
222
+
223
+ def check_total(type, expected, actual)
224
+ return if actual.round(2) == expected.round(2)
225
+
226
+ raise "Calculated %s mismatch %.2f != %.2f" % [
227
+ type,
228
+ actual,
229
+ expected
230
+ ]
231
+ end
232
+
233
+ # CapitalOneStatement::Transaction represents a single credit transaction
234
+ class CapitalOneStatement::Transaction < Struct.new(
235
+ :id,
236
+ :date,
237
+ :description,
238
+ :amount_str,
239
+ :amount
240
+ )
241
+ # @!attribute id
242
+ # @return [Fixnum] transaction id
243
+ #
244
+ # @!attribute date
245
+ # @return [Date] the date of the transaction
246
+ #
247
+ # @!attribute description
248
+ # @return [String] the description of the transaction
249
+ #
250
+ # @!attribute amount_str
251
+ # @return [String] the dollar amount string of the transaction
252
+ #
253
+ # @!attribute amount
254
+ # @return [Float] the dollar amount parsed into a Float, negative for payments
255
+
256
+ # @return [String] JSON representation of Transaction
257
+ def to_json(*args)
258
+ to_h.to_json(*args)
259
+ end
260
+ end
261
+ end