hsbc_pdf_statement_parser 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d9737c59472032f49b8bc7539d08856feb6e5703a5a43966b8a80340566e8de6
4
- data.tar.gz: 989e26dddd81066f6b2d931067acc47e6b23d60adf35daef7a30bdf27830317e
3
+ metadata.gz: 1aeed5005a826d427a328576db61241bab262e44941ae1c0af5d34f7028f1281
4
+ data.tar.gz: 1af68498d1b4e4732e42a7422a32930170f4870f85a458b0a166b95e0ef7b338
5
5
  SHA512:
6
- metadata.gz: ec9375eeee5b241bfc50fa13a1f39b99587e25fde0c5bde829bcd97d166124727837f788686218b5e46ad3aaaa54dc1707733f6902bd9e400447f93a35eaadf6
7
- data.tar.gz: 3db670821e2d4b6868ace32aefe4688645e4766d4e68a28c3c5d64e320fa7d68764799101c050ab1b9b1c2ca2bab907db6c9e6cc950ae3789edc09fbfb0cff65
6
+ metadata.gz: 64d6adeb4671eff35241d94d8f848fe54cbf6c324aa366e75d658d44b5113b468badde10b262ab092ab8b43e87402dad81cc10f9ba4b281c6fefc325c4533199
7
+ data.tar.gz: bebec0676834fd0da389f2094017a930c795f103a7477e67062449d31c3598cd9571f5f6dcbb57ab06549e4b761515915e9f211d936bf1e7660452c2497a32db
@@ -1,161 +1,165 @@
1
- class HsbcPdfStatementParser::Parser
1
+ module HsbcPdfStatementParser
2
2
 
3
- # Creates a new parser from a PDF file.
4
- #
5
- # === Parameters
6
- #
7
- # [filename] the filename to parse
8
- def initialize( filename )
9
-
10
- @reader = PDF::Reader.new( filename )
11
-
12
- end
13
-
14
- # Returns an array of the transactions in the document as hashes.
15
- #
16
- # === Hash keys
17
- #
18
- # [:date] the date of the transaction _(Date)_
19
- # [:type] the type of the transaction, eg ‘VISA’, ‘DD’, ‘ATM’, etc _(String)_
20
- # [:details] the details of the transaction. This can span multiple lines _(String)_
21
- # [:out] the amount of the transaction, if a debit _(Float, nil)_
22
- # [:in] the amount of the transaction, if a credit _(Float, nil)_
23
- # [:change] the amount of the transacation: negative if a debit, positive if a credit _(Float)_
24
- def transactions
25
-
26
- @_transactions ||= begin
3
+ class Parser
4
+
5
+ # Creates a new parser from a PDF file.
6
+ #
7
+ # === Parameters
8
+ #
9
+ # [filename] the filename to parse
10
+ def initialize( filename )
27
11
 
28
- current_transaction = nil
29
- current_date = nil
30
- transactions = []
12
+ @reader = PDF::Reader.new( filename )
31
13
 
32
- document_text
33
- .scan( /BALANCE\s?BROUGHT\s?FORWARD(?:.*?)\n(.*?)BALANCE\s?CARRIED\s?FORWARD/im )
34
- .map{ |text| parse_page( text[0] )}
35
- .flatten
36
- .each do |line|
14
+ end
15
+
16
+ # Returns an array of the transactions in the document as hashes.
17
+ #
18
+ # === Hash keys
19
+ #
20
+ # [:date] the date of the transaction _(Date)_
21
+ # [:type] the type of the transaction, eg ‘VISA’, ‘DD’, ‘ATM’, etc _(String)_
22
+ # [:details] the details of the transaction. This can span multiple lines _(String)_
23
+ # [:out] the amount of the transaction, if a debit _(Float, nil)_
24
+ # [:in] the amount of the transaction, if a credit _(Float, nil)_
25
+ # [:change] the amount of the transacation: negative if a debit, positive if a credit _(Float)_
26
+ def transactions
27
+
28
+ @_transactions ||= begin
37
29
 
38
- # store the current date
39
- current_date = line[:date] unless line[:date].nil?
30
+ current_transaction = nil
31
+ current_date = nil
32
+ transactions = []
40
33
 
41
- # if we have a type, start a new transaction
42
- unless line[:type].nil?
43
- transactions << current_transaction unless current_transaction.nil?
44
- current_transaction = line.merge( date: current_date )
45
- next
34
+ document_text
35
+ .scan( /BALANCE\s?BROUGHT\s?FORWARD(?:.*?)\n(.*?)BALANCE\s?CARRIED\s?FORWARD/im )
36
+ .map{ |text| parse_page( text[0] )}
37
+ .flatten
38
+ .each do |line|
39
+
40
+ # store the current date
41
+ current_date = line[:date] unless line[:date].nil?
42
+
43
+ # if we have a type, start a new transaction
44
+ unless line[:type].nil?
45
+ transactions << current_transaction unless current_transaction.nil?
46
+ current_transaction = line.merge( date: current_date )
47
+ next
48
+ end
49
+
50
+ # merge things in
51
+ current_transaction.merge!( line.select{ |k,v| v }, { details: "#{current_transaction[:details]}\n#{line[:details]}" })
52
+
46
53
  end
47
54
 
48
- # merge things in
49
- current_transaction.merge!( line.select{ |k,v| v }, { details: "#{current_transaction[:details]}\n#{line[:details]}" })
50
-
51
- end
55
+ # dump the final transaction + return
56
+ transactions << current_transaction unless current_transaction.nil?
57
+ transactions
58
+ end
52
59
 
53
- # dump the final transaction + return
54
- transactions << current_transaction unless current_transaction.nil?
55
- transactions
56
60
  end
57
61
 
58
- end
59
-
60
- # Returns the opening balance of the statement read from the table on the first page.
61
- def opening_balance
62
-
63
- @_opening_balance ||= scan_figure( 'Opening Balance' )
64
-
65
- end
66
-
67
- # Returns the closing balance of the statement read from the table on the first page.
68
- def closing_balance
69
-
70
- @_closing_balance ||= scan_figure( 'Closing Balance' )
71
-
72
- end
73
-
74
- # Returns the total value of payments in during the statement read from the table on the first page (ie: not calculated)
75
- def payments_in
76
-
77
- @_payments_in ||= scan_figure( 'Payments In' )
78
-
79
- end
80
-
81
- # Returns the total value of payments out during the statement read from the table on the first page (ie: not calculated)
82
- def payments_out
83
-
84
- @_payments_out ||= scan_figure( 'Payments Out' )
85
-
86
- end
87
-
88
- private
89
-
90
- def document_text
91
-
92
- @text ||= begin
62
+ # Returns the opening balance of the statement read from the table on the first page.
63
+ def opening_balance
93
64
 
94
- @reader.pages.map( &:text ).join
65
+ @_opening_balance ||= scan_figure( 'Opening Balance' )
95
66
 
96
67
  end
97
68
 
98
- end
99
-
100
- def scan_figure( search_string )
101
-
102
- @_first_page ||= @reader.pages.first.text
103
-
104
- match = Regexp.new( "#{search_string}(?:.*?)([0-9\.\,]{4,})", Regexp::IGNORECASE ).match( @_first_page )
105
- return nil if match.nil?
106
-
107
- match[1].gsub( ',', '' ).to_f
108
-
109
- end
110
-
111
- def parse_page( page_str )
69
+ # Returns the closing balance of the statement read from the table on the first page.
70
+ def closing_balance
71
+
72
+ @_closing_balance ||= scan_figure( 'Closing Balance' )
73
+
74
+ end
112
75
 
113
- # grab lines + get the longest
114
- lines = page_str.lines
115
- max_length = lines.map( &:length ).max
76
+ # Returns the total value of payments in during the statement read from the table on the first page (ie: not calculated)
77
+ def payments_in
78
+
79
+ @_payments_in ||= scan_figure( 'Payments In' )
80
+
81
+ end
116
82
 
117
- lines.map{ |line| parse_line( line.rstrip.ljust( max_length ))}.compact
83
+ # Returns the total value of payments out during the statement read from the table on the first page (ie: not calculated)
84
+ def payments_out
118
85
 
119
- end
120
-
121
- def parse_line( line_str )
86
+ @_payments_out ||= scan_figure( 'Payments Out' )
87
+
88
+ end
122
89
 
123
- # if we’re blank…
124
- return nil if line_str.strip.empty?
90
+ private
125
91
 
126
- # start cutting things up
127
- row = {
128
- date: empty_to_nil( line_str[0..12] ),
129
- type: empty_to_nil( line_str[12..20] ),
130
- details: empty_to_nil( line_str[20..70] ),
131
- out: empty_to_nil( line_str[70..90] ),
132
- in: empty_to_nil( line_str[90..110] ),
133
- balance: empty_to_nil( line_str[110..130] )
134
- }
92
+ def document_text
93
+
94
+ @text ||= begin
95
+
96
+ @reader.pages.map( &:text ).join
97
+
98
+ end
99
+
100
+ end
135
101
 
136
- # munge things further
137
- row[:date] = Date.strptime( row[:date], '%d %b %y' ) unless row[:date].nil?
138
- row[:out] = row[:out].gsub( ',', '' ).to_f unless row[:out].nil?
139
- row[:in] = row[:in].gsub( ',', '' ).to_f unless row[:in].nil?
140
- row[:balance] = row[:balance].gsub( ',', '' ).to_f unless row[:balance].nil?
102
+ def scan_figure( search_string )
103
+
104
+ @_first_page ||= @reader.pages.first.text
105
+
106
+ match = Regexp.new( "#{search_string}(?:.*?)([0-9\.\,]{4,})", Regexp::IGNORECASE ).match( @_first_page )
107
+ return nil if match.nil?
108
+
109
+ match[1].gsub( ',', '' ).to_f
110
+
111
+ end
141
112
 
142
- # set a change amount
143
- row[:change] = if !row[:out].nil?
144
- 0 - row[:out]
145
- elsif !row[:in].nil?
146
- row[:in]
147
- else
148
- nil
113
+ def parse_page( page_str )
114
+
115
+ # grab lines + get the longest
116
+ lines = page_str.lines
117
+ max_length = lines.map( &:length ).max
118
+
119
+ lines.map{ |line| parse_line( line.rstrip.ljust( max_length ))}.compact
120
+
149
121
  end
150
122
 
151
- # return
152
- row
123
+ def parse_line( line_str )
153
124
 
154
- end
155
-
156
- def empty_to_nil( str )
125
+ # if we’re blank…
126
+ return nil if line_str.strip.empty?
127
+
128
+ # start cutting things up
129
+ row = {
130
+ date: empty_to_nil( line_str[0..12] ),
131
+ type: empty_to_nil( line_str[12..20] ),
132
+ details: empty_to_nil( line_str[20..70] ),
133
+ out: empty_to_nil( line_str[70..90] ),
134
+ in: empty_to_nil( line_str[90..110] ),
135
+ balance: empty_to_nil( line_str[110..130] )
136
+ }
137
+
138
+ # munge things further
139
+ row[:date] = Date.strptime( row[:date], '%d %b %y' ) unless row[:date].nil?
140
+ row[:out] = row[:out].gsub( ',', '' ).to_f unless row[:out].nil?
141
+ row[:in] = row[:in].gsub( ',', '' ).to_f unless row[:in].nil?
142
+ row[:balance] = row[:balance].gsub( ',', '' ).to_f unless row[:balance].nil?
143
+
144
+ # set a change amount
145
+ row[:change] = if !row[:out].nil?
146
+ 0 - row[:out]
147
+ elsif !row[:in].nil?
148
+ row[:in]
149
+ else
150
+ nil
151
+ end
152
+
153
+ # return
154
+ row
155
+
156
+ end
157
157
 
158
- ( str.strip!.empty? ) ? nil : str
158
+ def empty_to_nil( str )
159
+
160
+ ( str.strip!.empty? ) ? nil : str
161
+
162
+ end
159
163
 
160
164
  end
161
165
 
@@ -1,3 +1,3 @@
1
1
  module HsbcPdfStatementParser
2
- VERSION = "1.0.0"
2
+ VERSION = "1.0.1"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hsbc_pdf_statement_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jon Pearse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-24 00:00:00.000000000 Z
11
+ date: 2019-11-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pdf-reader