bank_statement_parser 0.0.10 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/bank_statement_to_text.sh +22 -0
- data/bin/bank_statement_to_yaml.rb +3 -20
- data/lib/bank_statement_parser/bank_statement.rb +48 -0
- data/lib/bank_statement_parser/base.rb +55 -8
- data/lib/bank_statement_parser/hsbc.rb +13 -12
- data/lib/bank_statement_parser/statement_record.rb +3 -6
- data/lib/bank_statement_parser.rb +0 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 28cc31e88b6633ad90a259383e0fb6b4df8954f5
|
4
|
+
data.tar.gz: 703b94437f058e944a1ec40bc0f301b084518ff9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ccfcb6d0cdb6a92691d3ea9a257a88020e835b1c1ce272de80c302873ccccdc13bdf612498f2dc5f23918d84660be4c24f144c3f7d424595f9418233bb165ae6
|
7
|
+
data.tar.gz: 84378266b04c539eda026af368e81541b81f582b77b4aec32b356e3cbac80c845720941abe58109b560af97272277d15cee0fd3bc7bfed537040f3bcb0b07519
|
@@ -0,0 +1,22 @@
|
|
1
|
+
#!/bin/sh -
|
2
|
+
#
|
3
|
+
# Convert the specified bank statement PDF file to text
|
4
|
+
|
5
|
+
# Copyright 2015 Simon Dawson <spdawson@gmail.com>
|
6
|
+
|
7
|
+
# This file is part of bank_statement_parser.
|
8
|
+
#
|
9
|
+
# bank_statement_parser is free software: you can redistribute it and/or modify
|
10
|
+
# it under the terms of the GNU General Public License as published by
|
11
|
+
# the Free Software Foundation, either version 3 of the License, or
|
12
|
+
# (at your option) any later version.
|
13
|
+
#
|
14
|
+
# bank_statement_parser is distributed in the hope that it will be useful,
|
15
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
+
# GNU General Public License for more details.
|
18
|
+
#
|
19
|
+
# You should have received a copy of the GNU General Public License
|
20
|
+
# along with bank_statement_parser. If not, see <http://www.gnu.org/licenses/>.
|
21
|
+
|
22
|
+
exec pdftotext -layout -eol unix -enc UTF-8 "$1"
|
@@ -19,6 +19,8 @@
|
|
19
19
|
# You should have received a copy of the GNU General Public License
|
20
20
|
# along with bank_statement_parser. If not, see <http://www.gnu.org/licenses/>.
|
21
21
|
|
22
|
+
require 'yaml'
|
23
|
+
|
22
24
|
require 'bank_statement_parser'
|
23
25
|
|
24
26
|
parser = BankStatementParser::HSBC.new
|
@@ -26,23 +28,4 @@ parser = BankStatementParser::HSBC.new
|
|
26
28
|
# Attempt to parse the specified file
|
27
29
|
parser.parse ARGV[0]
|
28
30
|
|
29
|
-
|
30
|
-
puts <<METADATA
|
31
|
-
bank_statement:
|
32
|
-
account_number: #{parser.account_number}
|
33
|
-
sort_code: #{parser.sort_code}
|
34
|
-
statement_date: #{parser.statement_date}
|
35
|
-
records:
|
36
|
-
METADATA
|
37
|
-
|
38
|
-
# Statement records
|
39
|
-
parser.records.each do |record|
|
40
|
-
puts <<RECORD
|
41
|
-
- date: #{record.date}
|
42
|
-
type: #{record.type}
|
43
|
-
credit: #{record.credit}
|
44
|
-
amount: #{record.amount || ''}
|
45
|
-
detail: #{record.detail}
|
46
|
-
balance: #{record.balance || ''}
|
47
|
-
RECORD
|
48
|
-
end
|
31
|
+
puts YAML.dump(parser.bank_statement)
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# Copyright 2015 Simon Dawson <spdawson@gmail.com>
|
2
|
+
|
3
|
+
# This file is part of bank_statement_parser.
|
4
|
+
#
|
5
|
+
# bank_statement_parser is free software: you can redistribute it and/or modify
|
6
|
+
# it under the terms of the GNU General Public License as published by
|
7
|
+
# the Free Software Foundation, either version 3 of the License, or
|
8
|
+
# (at your option) any later version.
|
9
|
+
#
|
10
|
+
# bank_statement_parser is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License
|
16
|
+
# along with bank_statement_parser. If not, see <http://www.gnu.org/licenses/>.
|
17
|
+
|
18
|
+
require 'yaml'
|
19
|
+
|
20
|
+
module BankStatementParser
|
21
|
+
|
22
|
+
# A bank statement
|
23
|
+
class BankStatement
|
24
|
+
attr_accessor :sort_code, :account_number, :statement_date,
|
25
|
+
:opening_balance, :closing_balance, :records
|
26
|
+
|
27
|
+
# Constructor
|
28
|
+
def initialize
|
29
|
+
@records = []
|
30
|
+
end
|
31
|
+
|
32
|
+
# Stringify
|
33
|
+
def to_s
|
34
|
+
to_yaml
|
35
|
+
end
|
36
|
+
|
37
|
+
# Equality test
|
38
|
+
def ==(other)
|
39
|
+
super || (sort_code == other.sort_code &&
|
40
|
+
account_number == other.account_number &&
|
41
|
+
statement_date == other.statement_date &&
|
42
|
+
opening_balance == other.opening_balance &&
|
43
|
+
closing_balance == other.closing_balance &&
|
44
|
+
records == other.records)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
@@ -15,6 +15,8 @@
|
|
15
15
|
# You should have received a copy of the GNU General Public License
|
16
16
|
# along with bank_statement_parser. If not, see <http://www.gnu.org/licenses/>.
|
17
17
|
|
18
|
+
require 'bank_statement_parser/bank_statement'
|
19
|
+
|
18
20
|
module BankStatementParser
|
19
21
|
|
20
22
|
# Base class for statement parsers
|
@@ -31,7 +33,7 @@ module BankStatementParser
|
|
31
33
|
|
32
34
|
require 'fileutils.rb'
|
33
35
|
|
34
|
-
attr_accessor :
|
36
|
+
attr_accessor :bank_statement
|
35
37
|
|
36
38
|
# Constructor
|
37
39
|
def initialize
|
@@ -53,9 +55,9 @@ module BankStatementParser
|
|
53
55
|
end
|
54
56
|
|
55
57
|
# Sanity checking
|
56
|
-
raise "Failed to find sort code" if
|
57
|
-
raise "Failed to find account number" if
|
58
|
-
raise "Failed to find statement date" if
|
58
|
+
raise "Failed to find sort code" if sort_code.nil?
|
59
|
+
raise "Failed to find account number" if account_number.nil?
|
60
|
+
raise "Failed to find statement date" if statement_date.nil?
|
59
61
|
end
|
60
62
|
|
61
63
|
protected
|
@@ -67,10 +69,55 @@ module BankStatementParser
|
|
67
69
|
|
68
70
|
# Reset the parser
|
69
71
|
def reset
|
70
|
-
@
|
71
|
-
|
72
|
-
|
73
|
-
|
72
|
+
@bank_statement = BankStatement.new
|
73
|
+
end
|
74
|
+
|
75
|
+
# @todo FIXME: Why can't we use Forwardable for these methods?
|
76
|
+
#
|
77
|
+
# Partially works, but doesn't seem to be accessible from subclasses...
|
78
|
+
|
79
|
+
def sort_code
|
80
|
+
@bank_statement.sort_code
|
81
|
+
end
|
82
|
+
|
83
|
+
def sort_code= sort_code
|
84
|
+
@bank_statement.sort_code = sort_code
|
85
|
+
end
|
86
|
+
|
87
|
+
def account_number
|
88
|
+
@bank_statement.account_number
|
89
|
+
end
|
90
|
+
|
91
|
+
def account_number= account_number
|
92
|
+
@bank_statement.account_number = account_number
|
93
|
+
end
|
94
|
+
|
95
|
+
def statement_date
|
96
|
+
@bank_statement.statement_date
|
97
|
+
end
|
98
|
+
|
99
|
+
def statement_date= statement_date
|
100
|
+
@bank_statement.statement_date = statement_date
|
101
|
+
end
|
102
|
+
|
103
|
+
def opening_balance
|
104
|
+
@bank_statement.opening_balance
|
105
|
+
end
|
106
|
+
|
107
|
+
def opening_balance= opening_balance
|
108
|
+
@bank_statement.opening_balance = opening_balance
|
109
|
+
end
|
110
|
+
|
111
|
+
def closing_balance
|
112
|
+
@bank_statement.closing_balance
|
113
|
+
end
|
114
|
+
|
115
|
+
def closing_balance= closing_balance
|
116
|
+
@bank_statement.closing_balance = closing_balance
|
117
|
+
end
|
118
|
+
|
119
|
+
def add_record record
|
120
|
+
@bank_statement.records << record
|
74
121
|
end
|
75
122
|
|
76
123
|
end
|
@@ -17,6 +17,7 @@
|
|
17
17
|
|
18
18
|
require 'date'
|
19
19
|
require 'bank_statement_parser/base'
|
20
|
+
require 'bank_statement_parser/statement_record'
|
20
21
|
require 'bank_statement_parser/utils'
|
21
22
|
module BankStatementParser
|
22
23
|
|
@@ -49,23 +50,23 @@ module BankStatementParser
|
|
49
50
|
|
50
51
|
# Look for sort code and account number lines, if we haven't found
|
51
52
|
# one yet
|
52
|
-
if
|
53
|
+
if sort_code.nil? && account_number.nil?
|
53
54
|
if line =~ /(?:\A[A-Z][\w\s]+|,)\s+(?<sort_code>\d{2}-\d{2}-\d{2})\s+(?<account_number>\d{8})(?:\s*|\s+\d+)\z/
|
54
55
|
logger.debug { "Found sort code and account number" }
|
55
|
-
|
56
|
-
|
56
|
+
self.sort_code = Regexp.last_match(:sort_code)
|
57
|
+
self.account_number = Regexp.last_match(:account_number)
|
57
58
|
end
|
58
59
|
end
|
59
60
|
|
60
61
|
# Look for statement date lines, if we haven't found one yet
|
61
|
-
if
|
62
|
+
if statement_date.nil?
|
62
63
|
if line =~ /\A\s*(?<statement_date>\d{2} (?:#{MONTHS.map{|m| m[0,3]}.join('|')}) \d{4})\s*\z/
|
63
64
|
logger.debug { "Found statement date (1st form)" }
|
64
65
|
@statement_format = StatementFormat::FORMAT_1ST
|
65
66
|
|
66
67
|
# Parse statement date
|
67
68
|
date_string = Regexp.last_match(:statement_date)
|
68
|
-
|
69
|
+
self.statement_date = Date.parse(date_string)
|
69
70
|
elsif line =~ /\A(?<date_range_start>\d+\s+(?:#{MONTHS.join('|')})(?:\s+\d{4})?)\s+to\s+(?<date_range_end>\d+\s+(?:#{MONTHS.join('|')})\s+\d{4})\b/
|
70
71
|
logger.debug { "Found statement date (2nd form)" }
|
71
72
|
@statement_format = StatementFormat::FORMAT_2ND
|
@@ -75,11 +76,11 @@ module BankStatementParser
|
|
75
76
|
logger.debug { "Found statement date range #{date_range_start}-#{date_range_end}" }
|
76
77
|
|
77
78
|
# Parse range end date
|
78
|
-
|
79
|
+
self.statement_date = Date.parse(date_range_end)
|
79
80
|
end
|
80
81
|
end
|
81
82
|
|
82
|
-
if
|
83
|
+
if !sort_code.nil? && !account_number.nil? && !statement_date.nil?
|
83
84
|
|
84
85
|
# Look for statement records proper
|
85
86
|
headings = nil
|
@@ -161,14 +162,14 @@ module BankStatementParser
|
|
161
162
|
# The date we have parsed will have the year set to the current year.
|
162
163
|
#
|
163
164
|
# We need to figure out the correct year, from the statement date.
|
164
|
-
raise "No statement date" unless
|
165
|
-
record_date = Date.new(
|
165
|
+
raise "No statement date" unless statement_date
|
166
|
+
record_date = Date.new(statement_date.year,
|
166
167
|
record_date.month,
|
167
168
|
record_date.day)
|
168
169
|
logger.debug { "record date #{record_date}" }
|
169
|
-
if
|
170
|
+
if statement_date.month != record_date.month
|
170
171
|
logger.debug { "record month differs from statement month" }
|
171
|
-
if 1 ==
|
172
|
+
if 1 == statement_date.month
|
172
173
|
# Assume that the statement crosses a year boundary: the record
|
173
174
|
# must be from the end of the previous year
|
174
175
|
raise "Expected a record from December" unless
|
@@ -371,7 +372,7 @@ module BankStatementParser
|
|
371
372
|
detail: full_details,
|
372
373
|
balance: balance)
|
373
374
|
logger.debug { "Created statement record: #{record}" }
|
374
|
-
|
375
|
+
add_record record
|
375
376
|
|
376
377
|
@cached_payment_type = nil
|
377
378
|
@cached_details = []
|
@@ -15,6 +15,8 @@
|
|
15
15
|
# You should have received a copy of the GNU General Public License
|
16
16
|
# along with bank_statement_parser. If not, see <http://www.gnu.org/licenses/>.
|
17
17
|
|
18
|
+
require 'yaml'
|
19
|
+
|
18
20
|
module BankStatementParser
|
19
21
|
|
20
22
|
# A bank statement record
|
@@ -35,12 +37,7 @@ module BankStatementParser
|
|
35
37
|
|
36
38
|
# Stringify
|
37
39
|
def to_s
|
38
|
-
|
39
|
-
type,
|
40
|
-
credit.to_s,
|
41
|
-
(amount || ''),
|
42
|
-
detail,
|
43
|
-
(balance || '')]
|
40
|
+
to_yaml
|
44
41
|
end
|
45
42
|
|
46
43
|
# Equality test
|
metadata
CHANGED
@@ -1,24 +1,27 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bank_statement_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Simon Dawson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-03-
|
11
|
+
date: 2015-03-28 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: A gem for parsing bank statements
|
14
14
|
email: spdawson@gmail.com
|
15
15
|
executables:
|
16
16
|
- bank_statement_to_yaml.rb
|
17
|
+
- bank_statement_to_text.sh
|
17
18
|
extensions: []
|
18
19
|
extra_rdoc_files: []
|
19
20
|
files:
|
21
|
+
- bin/bank_statement_to_text.sh
|
20
22
|
- bin/bank_statement_to_yaml.rb
|
21
23
|
- lib/bank_statement_parser.rb
|
24
|
+
- lib/bank_statement_parser/bank_statement.rb
|
22
25
|
- lib/bank_statement_parser/base.rb
|
23
26
|
- lib/bank_statement_parser/hsbc.rb
|
24
27
|
- lib/bank_statement_parser/statement_record.rb
|