parse_capitalone_pdf_statement 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/.travis.yml +3 -0
- data/.yardopts +4 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +70 -0
- data/Rakefile +27 -0
- data/bin/capitalone_pdf_to_json.rb +13 -0
- data/lib/parse_capitalone_pdf_statement.rb +261 -0
- data/lib/parse_capitalone_pdf_statement/version.rb +3 -0
- data/parse_capitalone_pdf_statement.gemspec +35 -0
- data/test/data/bad_statement1.pdf +0 -0
- data/test/data/bad_statement1.txt +161 -0
- data/test/data/test_statement.json +610 -0
- data/test/data/test_statement.pdf +0 -0
- data/test/data/test_statement.txt +161 -0
- data/test/helper.rb +10 -0
- data/test/test_cli.rb +16 -0
- data/test/test_parser.rb +22 -0
- metadata +163 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 5d442f232d7445dc288b32cd2aa6810d118f0a9d
|
4
|
+
data.tar.gz: 77a644f514d32bc3ab777c5733e32a92261a5f73
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 241704b7f7219fead5c464e9787ecde2d3af95501147efcf844a770a13013eb785aa3d143d5081b10122fe0f8006ed52b0398954e3d3e221d6a8adb793297036
|
7
|
+
data.tar.gz: 23dd29b08081e26b56d29163e5b1f3b56de8708e4e5691848fd845a1938c21082edd0ec98078dea078c0c9c82c1009b4f4bb5c0c169273646703681073a63dd8
|
data/.gitignore
ADDED
data/.ruby-gemset
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
parse_capitalone_pdf_statement
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
ruby-2.1.4
|
data/.travis.yml
ADDED
data/.yardopts
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Joe Sortelli
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
# parse_capitalone_pdf_statement
|
2
|
+
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/parse_capitalone_pdf_statement.svg)](http://badge.fury.io/rb/parse_capitalone_pdf_statement)
|
4
|
+
[![Build Status](https://travis-ci.org/sortelli/parse_capitalone_pdf_statement.svg?branch=develop)](https://travis-ci.org/sortelli/parse_capitalone_pdf_statement)
|
5
|
+
[![Dependency Status](https://gemnasium.com/sortelli/parse_capitalone_pdf_statement.svg)](https://gemnasium.com/sortelli/parse_capitalone_pdf_statement)
|
6
|
+
|
7
|
+
|
8
|
+
The Capital One website only provides a way to download structured
|
9
|
+
data of credit card transaction history for the previous 180 days.
|
10
|
+
However, you are able to download monthly PDF account statements
|
11
|
+
for the previous few years.
|
12
|
+
|
13
|
+
This library allows you to parse a Capital One PDF monthly statement,
|
14
|
+
and access structured transaction history data.
|
15
|
+
|
16
|
+
## Installation
|
17
|
+
|
18
|
+
Add this line to your application's Gemfile:
|
19
|
+
|
20
|
+
```ruby
|
21
|
+
gem 'parse_capitalone_pdf_statement'
|
22
|
+
```
|
23
|
+
|
24
|
+
And then execute:
|
25
|
+
|
26
|
+
```bash
|
27
|
+
% bundle
|
28
|
+
```
|
29
|
+
|
30
|
+
Or install it yourself as:
|
31
|
+
|
32
|
+
```bash
|
33
|
+
% gem install parse_capitalone_pdf_statement
|
34
|
+
```
|
35
|
+
|
36
|
+
## Convert PDF to JSON
|
37
|
+
|
38
|
+
Use the ```capitalone_pdf_to_json.rb``` script to convert a PDF
|
39
|
+
montly statement to JSON.
|
40
|
+
|
41
|
+
```bash
|
42
|
+
% capitalone_pdf_to_json.rb my_monthly_statement.pdf > my_monthly_statement.json
|
43
|
+
```
|
44
|
+
|
45
|
+
## API Example
|
46
|
+
|
47
|
+
Parse a PDF monthly statement and print all payments:
|
48
|
+
|
49
|
+
```ruby
|
50
|
+
require 'parse_capitalone_pdf_statement'
|
51
|
+
|
52
|
+
statement = CapitalOneStatement.new('/path/to/my_monthly_statement.pdf')
|
53
|
+
|
54
|
+
statement.payments.each do |payment|
|
55
|
+
puts 'Transaction ID: %d' % payment.id
|
56
|
+
puts 'Date: %s' % payment.date
|
57
|
+
puts 'Description: %s' % payment.description
|
58
|
+
puts 'Amount: %.2f' % payment.amount
|
59
|
+
end
|
60
|
+
```
|
61
|
+
|
62
|
+
See the [API
|
63
|
+
Documentation](http://sortelli.github.io/parse_capitalone_pdf_statement/frames.html#!CapitalOneStatement.html)
|
64
|
+
for more information.
|
65
|
+
|
66
|
+
## License
|
67
|
+
|
68
|
+
Copyright (c) 2014 Joe Sortelli
|
69
|
+
|
70
|
+
MIT License
|
data/Rakefile
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'yard'
|
4
|
+
|
5
|
+
YARD::Rake::YardocTask.new
|
6
|
+
|
7
|
+
Rake::TestTask.new do |t|
|
8
|
+
t.test_files = FileList['test/test_*.rb']
|
9
|
+
end
|
10
|
+
|
11
|
+
task :build => :test
|
12
|
+
|
13
|
+
task :coverage do
|
14
|
+
ENV['COVERAGE'] = 'true'
|
15
|
+
Rake::Task['test'].execute
|
16
|
+
end
|
17
|
+
|
18
|
+
desc "Create test pdf files"
|
19
|
+
task :make_test_pdf do
|
20
|
+
text_to_pdf = 'enscript -B -f "Times-Roman6.0" %s --output=- | ps2pdf - > %s'
|
21
|
+
|
22
|
+
Dir.chdir(File.join(File.dirname(__FILE__), 'test', 'data')) do
|
23
|
+
%w{test_statement bad_statement1}.each do |name|
|
24
|
+
system(text_to_pdf % [name + '.txt', name + '.pdf'])
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "parse_capitalone_pdf_statement"
|
5
|
+
|
6
|
+
unless ARGV.size == 1
|
7
|
+
$stderr.puts "usage: #{File.basename(__FILE__)} statement.pdf"
|
8
|
+
exit 1
|
9
|
+
end
|
10
|
+
|
11
|
+
pdf_path = ARGV.first
|
12
|
+
|
13
|
+
puts JSON.pretty_generate(CapitalOneStatement.new(pdf_path))
|
@@ -0,0 +1,261 @@
|
|
1
|
+
require 'parse_capitalone_pdf_statement/version'
|
2
|
+
require 'pdf-reader'
|
3
|
+
require 'json'
|
4
|
+
require 'date'
|
5
|
+
|
6
|
+
# CapitalOneStatement object, with data parsed from a PDF monthly statement.
|
7
|
+
#
|
8
|
+
# @!attribute start_date [r]
|
9
|
+
# @return [Date] the first day of the monthly statement
|
10
|
+
#
|
11
|
+
# @!attribute end_date [r]
|
12
|
+
# @return [Date] the final day of the monthly statement
|
13
|
+
#
|
14
|
+
# @!attribute previous_balance [r]
|
15
|
+
# @return [Float] the "Previous Balance" field listed in the statement
|
16
|
+
#
|
17
|
+
# @!attribute total_payments [r]
|
18
|
+
# @return [Float] the "Payments and Credits" field listed in the statement
|
19
|
+
#
|
20
|
+
# @!attribute total_fees [r]
|
21
|
+
# @return [Float] the "Fees and Interest Charged" field listed in the statement
|
22
|
+
#
|
23
|
+
# @!attribute total_transactions [r]
|
24
|
+
# @return [Float] the "Transactions" field listed in the statement
|
25
|
+
#
|
26
|
+
# @!attribute new_balance [r]
|
27
|
+
# @return [Float] the New Balance listed in the statement
|
28
|
+
#
|
29
|
+
# @!attribute payments [r]
|
30
|
+
# @return [Array<CapitalOneStatement::Transaction>] array of payment transactions
|
31
|
+
#
|
32
|
+
# @!attribute transactions [r]
|
33
|
+
# @return [Array<CapitalOneStatement::Transaction>] array of charge transactions
|
34
|
+
#
|
35
|
+
# @!attribute fees [r]
|
36
|
+
# @return [Array<CapitalOneStatement::Transaction>] array of fee transactions
|
37
|
+
|
38
|
+
class CapitalOneStatement
|
39
|
+
DATE_REGEX = /(\w{3})\. (\d\d) - (\w{3})\. (\d\d), (\d{4})/
|
40
|
+
AMOUNT_REGEX = /\(?\$[\d,]+\.\d\d\)?/
|
41
|
+
AMOUNT_ONLY_REGEX = /^ *#{AMOUNT_REGEX.source} *$/
|
42
|
+
FEES_REGEX = /Total Fees This Period +(#{AMOUNT_REGEX.source})/
|
43
|
+
INTEREST_REGEX = /Total Interest This Period +(#{AMOUNT_REGEX.source})/
|
44
|
+
TRANSACTION_REGEX = /^ *(\d+) +(\d\d) ([A-Z][A-Z][A-Z]) (.+[^ ]) +(#{
|
45
|
+
AMOUNT_REGEX.source
|
46
|
+
}) *$/
|
47
|
+
|
48
|
+
attr_reader :start_date,
|
49
|
+
:end_date,
|
50
|
+
:previous_balance,
|
51
|
+
:total_payments,
|
52
|
+
:total_fees,
|
53
|
+
:total_transactions,
|
54
|
+
:new_balance,
|
55
|
+
:payments,
|
56
|
+
:transactions
|
57
|
+
|
58
|
+
def initialize(pdf_path)
|
59
|
+
@dec_from_prev_year = nil
|
60
|
+
@year = nil
|
61
|
+
@start_date = nil
|
62
|
+
@end_date = nil
|
63
|
+
@previous_balance = nil
|
64
|
+
@new_balance = nil
|
65
|
+
@total_payments = nil
|
66
|
+
@total_transactions = nil
|
67
|
+
@total_fees = nil
|
68
|
+
@payments = []
|
69
|
+
@transactions = []
|
70
|
+
@fees = []
|
71
|
+
|
72
|
+
parse_from_pdf pdf_path
|
73
|
+
|
74
|
+
%w{payments transactions fees}.each do |type|
|
75
|
+
trxs = "@#{type}"
|
76
|
+
total = "@total_#{type}"
|
77
|
+
|
78
|
+
instance_variable_set(trxs, instance_variable_get(trxs).sort_by {|trx| trx[:id]})
|
79
|
+
|
80
|
+
check_total(
|
81
|
+
type,
|
82
|
+
instance_variable_get(total),
|
83
|
+
instance_variable_get(trxs).inject(0) {|sum, trx| sum += trx[:amount]}
|
84
|
+
)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def to_json(*args)
|
89
|
+
{
|
90
|
+
:start_date => @start_date,
|
91
|
+
:end_date => @end_date,
|
92
|
+
:previous_balance => @previous_balance,
|
93
|
+
:total_payments => @total_payments,
|
94
|
+
:total_fees => @total_fees,
|
95
|
+
:total_transactions => @total_transactions,
|
96
|
+
:new_balance => @new_balance,
|
97
|
+
:payments => @payments,
|
98
|
+
:transactions => @transactions,
|
99
|
+
:fees => @fees
|
100
|
+
}.to_json(*args)
|
101
|
+
end
|
102
|
+
|
103
|
+
private
|
104
|
+
|
105
|
+
def parse_from_pdf(pdf_path)
|
106
|
+
PDF::Reader.new(pdf_path).pages.each_with_index do |page, page_num|
|
107
|
+
if @year.nil?
|
108
|
+
walker = Struct.new(:year, :offset, :start_date, :end_date) do
|
109
|
+
def respond_to?(_)
|
110
|
+
true
|
111
|
+
end
|
112
|
+
|
113
|
+
def method_missing(name, *args)
|
114
|
+
return unless name =~ /show_text/
|
115
|
+
|
116
|
+
if args.any? {|str| str.to_s =~ DATE_REGEX}
|
117
|
+
self.offset = ($1.upcase == 'DEC' && $3.upcase == 'JAN') ? 1 : 0
|
118
|
+
self.year = $5.to_i
|
119
|
+
self.start_date = Date.parse('%s-%s-%s' % [year - offset, $1, $2])
|
120
|
+
self.end_date = Date.parse('%s-%s-%s' % [year, $3, $4])
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end.new
|
124
|
+
|
125
|
+
page.walk walker
|
126
|
+
|
127
|
+
@dec_from_prev_year = walker.offset == 1
|
128
|
+
@year = walker.year
|
129
|
+
@start_date = walker.start_date
|
130
|
+
@end_date = walker.end_date
|
131
|
+
end
|
132
|
+
|
133
|
+
enum = page.text.split("\n").each
|
134
|
+
|
135
|
+
loop do
|
136
|
+
current_line = enum.next
|
137
|
+
enum.next until (enum.peek rescue nil) != ''
|
138
|
+
next_line = (enum.peek rescue nil)
|
139
|
+
|
140
|
+
parse_pdf_line page_num, current_line, next_line
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def parse_pdf_line(page_num, line, next_line)
|
146
|
+
if @previous_balance.nil?
|
147
|
+
amount_strs = line.scan AMOUNT_REGEX
|
148
|
+
if amount_strs.size == 5
|
149
|
+
@previous_balance,
|
150
|
+
@total_payments,
|
151
|
+
@total_fees,
|
152
|
+
@total_transactions,
|
153
|
+
@new_balance = amount_strs.map {|amount| parse_amount(amount)}
|
154
|
+
|
155
|
+
@total_payments = -@total_payments
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
if line =~ FEES_REGEX && $1 != '$0.00'
|
160
|
+
check_billing_cycle
|
161
|
+
@fees << Transaction.new(
|
162
|
+
@fees.size + 1,
|
163
|
+
@end_date, "CAPITAL ONE MEMBER FEE",
|
164
|
+
$1,
|
165
|
+
parse_amount($1)
|
166
|
+
)
|
167
|
+
end
|
168
|
+
|
169
|
+
if line =~ INTEREST_REGEX && $1 != '$0.00'
|
170
|
+
check_billing_cycle
|
171
|
+
@fees << Transaction.new(
|
172
|
+
@fees.size + 1,
|
173
|
+
@end_date, "INTEREST CHARGE:PURCHASES",
|
174
|
+
$1,
|
175
|
+
parse_amount($1)
|
176
|
+
)
|
177
|
+
end
|
178
|
+
|
179
|
+
transactions, payments = [(0..78), (80..-1)].map do |index|
|
180
|
+
str = line .to_s[index].to_s
|
181
|
+
next_str = next_line.to_s[index].to_s
|
182
|
+
|
183
|
+
repair_transaction_line str, next_str
|
184
|
+
end.map do |str|
|
185
|
+
parse_transaction(str)
|
186
|
+
end.compact.partition do |trx|
|
187
|
+
trx[:amount] >= 0
|
188
|
+
end
|
189
|
+
|
190
|
+
@transactions += transactions
|
191
|
+
@payments += payments
|
192
|
+
end
|
193
|
+
|
194
|
+
def repair_transaction_line(line, next_line)
|
195
|
+
if next_line =~ AMOUNT_ONLY_REGEX && !(line =~ AMOUNT_REGEX)
|
196
|
+
line += " #{next_line.strip}"
|
197
|
+
else
|
198
|
+
line
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
def parse_transaction(line)
|
203
|
+
return nil unless line =~ TRANSACTION_REGEX &&
|
204
|
+
$4 != "CAPITAL ONE MEMBER FEE"
|
205
|
+
|
206
|
+
check_billing_cycle
|
207
|
+
|
208
|
+
year = ($3.upcase == 'DEC' && @dec_from_prev_year) ? @year - 1 : @year
|
209
|
+
date = Date.parse('%s-%s-%s' % [year, $3, $2])
|
210
|
+
|
211
|
+
Transaction.new($1.to_i, date, $4, $5, parse_amount($5))
|
212
|
+
end
|
213
|
+
|
214
|
+
def parse_amount(amount)
|
215
|
+
num = amount.gsub(/[^\d.]/, '').to_f
|
216
|
+
amount.start_with?(?() ? -num : num
|
217
|
+
end
|
218
|
+
|
219
|
+
def check_billing_cycle
|
220
|
+
raise "Failed to determine billing cycle dates" if @year.nil?
|
221
|
+
end
|
222
|
+
|
223
|
+
def check_total(type, expected, actual)
|
224
|
+
return if actual.round(2) == expected.round(2)
|
225
|
+
|
226
|
+
raise "Calculated %s mismatch %.2f != %.2f" % [
|
227
|
+
type,
|
228
|
+
actual,
|
229
|
+
expected
|
230
|
+
]
|
231
|
+
end
|
232
|
+
|
233
|
+
# CapitalOneStatement::Transaction represents a single credit transaction
|
234
|
+
class CapitalOneStatement::Transaction < Struct.new(
|
235
|
+
:id,
|
236
|
+
:date,
|
237
|
+
:description,
|
238
|
+
:amount_str,
|
239
|
+
:amount
|
240
|
+
)
|
241
|
+
# @!attribute id
|
242
|
+
# @return [Fixnum] transaction id
|
243
|
+
#
|
244
|
+
# @!attribute date
|
245
|
+
# @return [Date] the date of the transaction
|
246
|
+
#
|
247
|
+
# @!attribute description
|
248
|
+
# @return [String] the description of the transaction
|
249
|
+
#
|
250
|
+
# @!attribute amount_str
|
251
|
+
# @return [String] the dollar amount string of the transaction
|
252
|
+
#
|
253
|
+
# @!attribute amount
|
254
|
+
# @return [Float] the dollar amount parsed into a Float, negative for payments
|
255
|
+
|
256
|
+
# @return [String] JSON representation of Transaction
|
257
|
+
def to_json(*args)
|
258
|
+
to_h.to_json(*args)
|
259
|
+
end
|
260
|
+
end
|
261
|
+
end
|