parse_capitalone_pdf_statement 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5d442f232d7445dc288b32cd2aa6810d118f0a9d
4
+ data.tar.gz: 77a644f514d32bc3ab777c5733e32a92261a5f73
5
+ SHA512:
6
+ metadata.gz: 241704b7f7219fead5c464e9787ecde2d3af95501147efcf844a770a13013eb785aa3d143d5081b10122fe0f8006ed52b0398954e3d3e221d6a8adb793297036
7
+ data.tar.gz: 23dd29b08081e26b56d29163e5b1f3b56de8708e4e5691848fd845a1938c21082edd0ec98078dea078c0c9c82c1009b4f4bb5c0c169273646703681073a63dd8
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/.ruby-gemset ADDED
@@ -0,0 +1 @@
1
+ parse_capitalone_pdf_statement
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ ruby-2.1.4
data/.travis.yml ADDED
@@ -0,0 +1,3 @@
1
+ ---
2
+ language: ruby
3
+ script: COVERAGE=true bundle exec rake build
data/.yardopts ADDED
@@ -0,0 +1,4 @@
1
+ --no-private
2
+ -
3
+ LICENSE.txt
4
+ README.md
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in parse_capitalone_pdf_statement.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Joe Sortelli
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,70 @@
1
+ # parse_capitalone_pdf_statement
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/parse_capitalone_pdf_statement.svg)](http://badge.fury.io/rb/parse_capitalone_pdf_statement)
4
+ [![Build Status](https://travis-ci.org/sortelli/parse_capitalone_pdf_statement.svg?branch=develop)](https://travis-ci.org/sortelli/parse_capitalone_pdf_statement)
5
+ [![Dependency Status](https://gemnasium.com/sortelli/parse_capitalone_pdf_statement.svg)](https://gemnasium.com/sortelli/parse_capitalone_pdf_statement)
6
+
7
+
8
+ The Capital One website only provides a way to download structured
9
+ data of credit card transaction history for the previous 180 days.
10
+ However, you are able to download monthly PDF account statements
11
+ for the previous few years.
12
+
13
+ This library allows you to parse a Capital One PDF monthly statement,
14
+ and access structured transaction history data.
15
+
16
+ ## Installation
17
+
18
+ Add this line to your application's Gemfile:
19
+
20
+ ```ruby
21
+ gem 'parse_capitalone_pdf_statement'
22
+ ```
23
+
24
+ And then execute:
25
+
26
+ ```bash
27
+ % bundle
28
+ ```
29
+
30
+ Or install it yourself as:
31
+
32
+ ```bash
33
+ % gem install parse_capitalone_pdf_statement
34
+ ```
35
+
36
+ ## Convert PDF to JSON
37
+
38
+ Use the ```capitalone_pdf_to_json.rb``` script to convert a PDF
39
+ montly statement to JSON.
40
+
41
+ ```bash
42
+ % capitalone_pdf_to_json.rb my_monthly_statement.pdf > my_monthly_statement.json
43
+ ```
44
+
45
+ ## API Example
46
+
47
+ Parse a PDF monthly statement and print all payments:
48
+
49
+ ```ruby
50
+ require 'parse_capitalone_pdf_statement'
51
+
52
+ statement = CapitalOneStatement.new('/path/to/my_monthly_statement.pdf')
53
+
54
+ statement.payments.each do |payment|
55
+ puts 'Transaction ID: %d' % payment.id
56
+ puts 'Date: %s' % payment.date
57
+ puts 'Description: %s' % payment.description
58
+ puts 'Amount: %.2f' % payment.amount
59
+ end
60
+ ```
61
+
62
+ See the [API
63
+ Documentation](http://sortelli.github.io/parse_capitalone_pdf_statement/frames.html#!CapitalOneStatement.html)
64
+ for more information.
65
+
66
+ ## License
67
+
68
+ Copyright (c) 2014 Joe Sortelli
69
+
70
+ MIT License
data/Rakefile ADDED
@@ -0,0 +1,27 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/testtask'
3
+ require 'yard'
4
+
5
+ YARD::Rake::YardocTask.new
6
+
7
+ Rake::TestTask.new do |t|
8
+ t.test_files = FileList['test/test_*.rb']
9
+ end
10
+
11
+ task :build => :test
12
+
13
+ task :coverage do
14
+ ENV['COVERAGE'] = 'true'
15
+ Rake::Task['test'].execute
16
+ end
17
+
18
+ desc "Create test pdf files"
19
+ task :make_test_pdf do
20
+ text_to_pdf = 'enscript -B -f "Times-Roman6.0" %s --output=- | ps2pdf - > %s'
21
+
22
+ Dir.chdir(File.join(File.dirname(__FILE__), 'test', 'data')) do
23
+ %w{test_statement bad_statement1}.each do |name|
24
+ system(text_to_pdf % [name + '.txt', name + '.pdf'])
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "parse_capitalone_pdf_statement"
5
+
6
+ unless ARGV.size == 1
7
+ $stderr.puts "usage: #{File.basename(__FILE__)} statement.pdf"
8
+ exit 1
9
+ end
10
+
11
+ pdf_path = ARGV.first
12
+
13
+ puts JSON.pretty_generate(CapitalOneStatement.new(pdf_path))
@@ -0,0 +1,261 @@
1
+ require 'parse_capitalone_pdf_statement/version'
2
+ require 'pdf-reader'
3
+ require 'json'
4
+ require 'date'
5
+
6
+ # CapitalOneStatement object, with data parsed from a PDF monthly statement.
7
+ #
8
+ # @!attribute start_date [r]
9
+ # @return [Date] the first day of the monthly statement
10
+ #
11
+ # @!attribute end_date [r]
12
+ # @return [Date] the final day of the monthly statement
13
+ #
14
+ # @!attribute previous_balance [r]
15
+ # @return [Float] the "Previous Balance" field listed in the statement
16
+ #
17
+ # @!attribute total_payments [r]
18
+ # @return [Float] the "Payments and Credits" field listed in the statement
19
+ #
20
+ # @!attribute total_fees [r]
21
+ # @return [Float] the "Fees and Interest Charged" field listed in the statement
22
+ #
23
+ # @!attribute total_transactions [r]
24
+ # @return [Float] the "Transactions" field listed in the statement
25
+ #
26
+ # @!attribute new_balance [r]
27
+ # @return [Float] the New Balance listed in the statement
28
+ #
29
+ # @!attribute payments [r]
30
+ # @return [Array<CapitalOneStatement::Transaction>] array of payment transactions
31
+ #
32
+ # @!attribute transactions [r]
33
+ # @return [Array<CapitalOneStatement::Transaction>] array of charge transactions
34
+ #
35
+ # @!attribute fees [r]
36
+ # @return [Array<CapitalOneStatement::Transaction>] array of fee transactions
37
+
38
+ class CapitalOneStatement
39
+ DATE_REGEX = /(\w{3})\. (\d\d) - (\w{3})\. (\d\d), (\d{4})/
40
+ AMOUNT_REGEX = /\(?\$[\d,]+\.\d\d\)?/
41
+ AMOUNT_ONLY_REGEX = /^ *#{AMOUNT_REGEX.source} *$/
42
+ FEES_REGEX = /Total Fees This Period +(#{AMOUNT_REGEX.source})/
43
+ INTEREST_REGEX = /Total Interest This Period +(#{AMOUNT_REGEX.source})/
44
+ TRANSACTION_REGEX = /^ *(\d+) +(\d\d) ([A-Z][A-Z][A-Z]) (.+[^ ]) +(#{
45
+ AMOUNT_REGEX.source
46
+ }) *$/
47
+
48
+ attr_reader :start_date,
49
+ :end_date,
50
+ :previous_balance,
51
+ :total_payments,
52
+ :total_fees,
53
+ :total_transactions,
54
+ :new_balance,
55
+ :payments,
56
+ :transactions
57
+
58
+ def initialize(pdf_path)
59
+ @dec_from_prev_year = nil
60
+ @year = nil
61
+ @start_date = nil
62
+ @end_date = nil
63
+ @previous_balance = nil
64
+ @new_balance = nil
65
+ @total_payments = nil
66
+ @total_transactions = nil
67
+ @total_fees = nil
68
+ @payments = []
69
+ @transactions = []
70
+ @fees = []
71
+
72
+ parse_from_pdf pdf_path
73
+
74
+ %w{payments transactions fees}.each do |type|
75
+ trxs = "@#{type}"
76
+ total = "@total_#{type}"
77
+
78
+ instance_variable_set(trxs, instance_variable_get(trxs).sort_by {|trx| trx[:id]})
79
+
80
+ check_total(
81
+ type,
82
+ instance_variable_get(total),
83
+ instance_variable_get(trxs).inject(0) {|sum, trx| sum += trx[:amount]}
84
+ )
85
+ end
86
+ end
87
+
88
+ def to_json(*args)
89
+ {
90
+ :start_date => @start_date,
91
+ :end_date => @end_date,
92
+ :previous_balance => @previous_balance,
93
+ :total_payments => @total_payments,
94
+ :total_fees => @total_fees,
95
+ :total_transactions => @total_transactions,
96
+ :new_balance => @new_balance,
97
+ :payments => @payments,
98
+ :transactions => @transactions,
99
+ :fees => @fees
100
+ }.to_json(*args)
101
+ end
102
+
103
+ private
104
+
105
+ def parse_from_pdf(pdf_path)
106
+ PDF::Reader.new(pdf_path).pages.each_with_index do |page, page_num|
107
+ if @year.nil?
108
+ walker = Struct.new(:year, :offset, :start_date, :end_date) do
109
+ def respond_to?(_)
110
+ true
111
+ end
112
+
113
+ def method_missing(name, *args)
114
+ return unless name =~ /show_text/
115
+
116
+ if args.any? {|str| str.to_s =~ DATE_REGEX}
117
+ self.offset = ($1.upcase == 'DEC' && $3.upcase == 'JAN') ? 1 : 0
118
+ self.year = $5.to_i
119
+ self.start_date = Date.parse('%s-%s-%s' % [year - offset, $1, $2])
120
+ self.end_date = Date.parse('%s-%s-%s' % [year, $3, $4])
121
+ end
122
+ end
123
+ end.new
124
+
125
+ page.walk walker
126
+
127
+ @dec_from_prev_year = walker.offset == 1
128
+ @year = walker.year
129
+ @start_date = walker.start_date
130
+ @end_date = walker.end_date
131
+ end
132
+
133
+ enum = page.text.split("\n").each
134
+
135
+ loop do
136
+ current_line = enum.next
137
+ enum.next until (enum.peek rescue nil) != ''
138
+ next_line = (enum.peek rescue nil)
139
+
140
+ parse_pdf_line page_num, current_line, next_line
141
+ end
142
+ end
143
+ end
144
+
145
+ def parse_pdf_line(page_num, line, next_line)
146
+ if @previous_balance.nil?
147
+ amount_strs = line.scan AMOUNT_REGEX
148
+ if amount_strs.size == 5
149
+ @previous_balance,
150
+ @total_payments,
151
+ @total_fees,
152
+ @total_transactions,
153
+ @new_balance = amount_strs.map {|amount| parse_amount(amount)}
154
+
155
+ @total_payments = -@total_payments
156
+ end
157
+ end
158
+
159
+ if line =~ FEES_REGEX && $1 != '$0.00'
160
+ check_billing_cycle
161
+ @fees << Transaction.new(
162
+ @fees.size + 1,
163
+ @end_date, "CAPITAL ONE MEMBER FEE",
164
+ $1,
165
+ parse_amount($1)
166
+ )
167
+ end
168
+
169
+ if line =~ INTEREST_REGEX && $1 != '$0.00'
170
+ check_billing_cycle
171
+ @fees << Transaction.new(
172
+ @fees.size + 1,
173
+ @end_date, "INTEREST CHARGE:PURCHASES",
174
+ $1,
175
+ parse_amount($1)
176
+ )
177
+ end
178
+
179
+ transactions, payments = [(0..78), (80..-1)].map do |index|
180
+ str = line .to_s[index].to_s
181
+ next_str = next_line.to_s[index].to_s
182
+
183
+ repair_transaction_line str, next_str
184
+ end.map do |str|
185
+ parse_transaction(str)
186
+ end.compact.partition do |trx|
187
+ trx[:amount] >= 0
188
+ end
189
+
190
+ @transactions += transactions
191
+ @payments += payments
192
+ end
193
+
194
+ def repair_transaction_line(line, next_line)
195
+ if next_line =~ AMOUNT_ONLY_REGEX && !(line =~ AMOUNT_REGEX)
196
+ line += " #{next_line.strip}"
197
+ else
198
+ line
199
+ end
200
+ end
201
+
202
+ def parse_transaction(line)
203
+ return nil unless line =~ TRANSACTION_REGEX &&
204
+ $4 != "CAPITAL ONE MEMBER FEE"
205
+
206
+ check_billing_cycle
207
+
208
+ year = ($3.upcase == 'DEC' && @dec_from_prev_year) ? @year - 1 : @year
209
+ date = Date.parse('%s-%s-%s' % [year, $3, $2])
210
+
211
+ Transaction.new($1.to_i, date, $4, $5, parse_amount($5))
212
+ end
213
+
214
+ def parse_amount(amount)
215
+ num = amount.gsub(/[^\d.]/, '').to_f
216
+ amount.start_with?(?() ? -num : num
217
+ end
218
+
219
+ def check_billing_cycle
220
+ raise "Failed to determine billing cycle dates" if @year.nil?
221
+ end
222
+
223
+ def check_total(type, expected, actual)
224
+ return if actual.round(2) == expected.round(2)
225
+
226
+ raise "Calculated %s mismatch %.2f != %.2f" % [
227
+ type,
228
+ actual,
229
+ expected
230
+ ]
231
+ end
232
+
233
+ # CapitalOneStatement::Transaction represents a single credit transaction
234
+ class CapitalOneStatement::Transaction < Struct.new(
235
+ :id,
236
+ :date,
237
+ :description,
238
+ :amount_str,
239
+ :amount
240
+ )
241
+ # @!attribute id
242
+ # @return [Fixnum] transaction id
243
+ #
244
+ # @!attribute date
245
+ # @return [Date] the date of the transaction
246
+ #
247
+ # @!attribute description
248
+ # @return [String] the description of the transaction
249
+ #
250
+ # @!attribute amount_str
251
+ # @return [String] the dollar amount string of the transaction
252
+ #
253
+ # @!attribute amount
254
+ # @return [Float] the dollar amount parsed into a Float, negative for payments
255
+
256
+ # @return [String] JSON representation of Transaction
257
+ def to_json(*args)
258
+ to_h.to_json(*args)
259
+ end
260
+ end
261
+ end