hsbc_pdf_statement_parser 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d9737c59472032f49b8bc7539d08856feb6e5703a5a43966b8a80340566e8de6
4
- data.tar.gz: 989e26dddd81066f6b2d931067acc47e6b23d60adf35daef7a30bdf27830317e
3
+ metadata.gz: 1aeed5005a826d427a328576db61241bab262e44941ae1c0af5d34f7028f1281
4
+ data.tar.gz: 1af68498d1b4e4732e42a7422a32930170f4870f85a458b0a166b95e0ef7b338
5
5
  SHA512:
6
- metadata.gz: ec9375eeee5b241bfc50fa13a1f39b99587e25fde0c5bde829bcd97d166124727837f788686218b5e46ad3aaaa54dc1707733f6902bd9e400447f93a35eaadf6
7
- data.tar.gz: 3db670821e2d4b6868ace32aefe4688645e4766d4e68a28c3c5d64e320fa7d68764799101c050ab1b9b1c2ca2bab907db6c9e6cc950ae3789edc09fbfb0cff65
6
+ metadata.gz: 64d6adeb4671eff35241d94d8f848fe54cbf6c324aa366e75d658d44b5113b468badde10b262ab092ab8b43e87402dad81cc10f9ba4b281c6fefc325c4533199
7
+ data.tar.gz: bebec0676834fd0da389f2094017a930c795f103a7477e67062449d31c3598cd9571f5f6dcbb57ab06549e4b761515915e9f211d936bf1e7660452c2497a32db
@@ -1,161 +1,165 @@
1
- class HsbcPdfStatementParser::Parser
1
+ module HsbcPdfStatementParser
2
2
 
3
- # Creates a new parser from a PDF file.
4
- #
5
- # === Parameters
6
- #
7
- # [filename] the filename to parse
8
- def initialize( filename )
9
-
10
- @reader = PDF::Reader.new( filename )
11
-
12
- end
13
-
14
- # Returns an array of the transactions in the document as hashes.
15
- #
16
- # === Hash keys
17
- #
18
- # [:date] the date of the transaction _(Date)_
19
- # [:type] the type of the transaction, eg ‘VISA’, ‘DD’, ‘ATM’, etc _(String)_
20
- # [:details] the details of the transaction. This can span multiple lines _(String)_
21
- # [:out] the amount of the transaction, if a debit _(Float, nil)_
22
- # [:in] the amount of the transaction, if a credit _(Float, nil)_
23
- # [:change] the amount of the transacation: negative if a debit, positive if a credit _(Float)_
24
- def transactions
25
-
26
- @_transactions ||= begin
3
+ class Parser
4
+
5
+ # Creates a new parser from a PDF file.
6
+ #
7
+ # === Parameters
8
+ #
9
+ # [filename] the filename to parse
10
+ def initialize( filename )
27
11
 
28
- current_transaction = nil
29
- current_date = nil
30
- transactions = []
12
+ @reader = PDF::Reader.new( filename )
31
13
 
32
- document_text
33
- .scan( /BALANCE\s?BROUGHT\s?FORWARD(?:.*?)\n(.*?)BALANCE\s?CARRIED\s?FORWARD/im )
34
- .map{ |text| parse_page( text[0] )}
35
- .flatten
36
- .each do |line|
14
+ end
15
+
16
+ # Returns an array of the transactions in the document as hashes.
17
+ #
18
+ # === Hash keys
19
+ #
20
+ # [:date] the date of the transaction _(Date)_
21
+ # [:type] the type of the transaction, eg ‘VISA’, ‘DD’, ‘ATM’, etc _(String)_
22
+ # [:details] the details of the transaction. This can span multiple lines _(String)_
23
+ # [:out] the amount of the transaction, if a debit _(Float, nil)_
24
+ # [:in] the amount of the transaction, if a credit _(Float, nil)_
25
+ # [:change] the amount of the transacation: negative if a debit, positive if a credit _(Float)_
26
+ def transactions
27
+
28
+ @_transactions ||= begin
37
29
 
38
- # store the current date
39
- current_date = line[:date] unless line[:date].nil?
30
+ current_transaction = nil
31
+ current_date = nil
32
+ transactions = []
40
33
 
41
- # if we have a type, start a new transaction
42
- unless line[:type].nil?
43
- transactions << current_transaction unless current_transaction.nil?
44
- current_transaction = line.merge( date: current_date )
45
- next
34
+ document_text
35
+ .scan( /BALANCE\s?BROUGHT\s?FORWARD(?:.*?)\n(.*?)BALANCE\s?CARRIED\s?FORWARD/im )
36
+ .map{ |text| parse_page( text[0] )}
37
+ .flatten
38
+ .each do |line|
39
+
40
+ # store the current date
41
+ current_date = line[:date] unless line[:date].nil?
42
+
43
+ # if we have a type, start a new transaction
44
+ unless line[:type].nil?
45
+ transactions << current_transaction unless current_transaction.nil?
46
+ current_transaction = line.merge( date: current_date )
47
+ next
48
+ end
49
+
50
+ # merge things in
51
+ current_transaction.merge!( line.select{ |k,v| v }, { details: "#{current_transaction[:details]}\n#{line[:details]}" })
52
+
46
53
  end
47
54
 
48
- # merge things in
49
- current_transaction.merge!( line.select{ |k,v| v }, { details: "#{current_transaction[:details]}\n#{line[:details]}" })
50
-
51
- end
55
+ # dump the final transaction + return
56
+ transactions << current_transaction unless current_transaction.nil?
57
+ transactions
58
+ end
52
59
 
53
- # dump the final transaction + return
54
- transactions << current_transaction unless current_transaction.nil?
55
- transactions
56
60
  end
57
61
 
58
- end
59
-
60
- # Returns the opening balance of the statement read from the table on the first page.
61
- def opening_balance
62
-
63
- @_opening_balance ||= scan_figure( 'Opening Balance' )
64
-
65
- end
66
-
67
- # Returns the closing balance of the statement read from the table on the first page.
68
- def closing_balance
69
-
70
- @_closing_balance ||= scan_figure( 'Closing Balance' )
71
-
72
- end
73
-
74
- # Returns the total value of payments in during the statement read from the table on the first page (ie: not calculated)
75
- def payments_in
76
-
77
- @_payments_in ||= scan_figure( 'Payments In' )
78
-
79
- end
80
-
81
- # Returns the total value of payments out during the statement read from the table on the first page (ie: not calculated)
82
- def payments_out
83
-
84
- @_payments_out ||= scan_figure( 'Payments Out' )
85
-
86
- end
87
-
88
- private
89
-
90
- def document_text
91
-
92
- @text ||= begin
62
+ # Returns the opening balance of the statement read from the table on the first page.
63
+ def opening_balance
93
64
 
94
- @reader.pages.map( &:text ).join
65
+ @_opening_balance ||= scan_figure( 'Opening Balance' )
95
66
 
96
67
  end
97
68
 
98
- end
99
-
100
- def scan_figure( search_string )
101
-
102
- @_first_page ||= @reader.pages.first.text
103
-
104
- match = Regexp.new( "#{search_string}(?:.*?)([0-9\.\,]{4,})", Regexp::IGNORECASE ).match( @_first_page )
105
- return nil if match.nil?
106
-
107
- match[1].gsub( ',', '' ).to_f
108
-
109
- end
110
-
111
- def parse_page( page_str )
69
+ # Returns the closing balance of the statement read from the table on the first page.
70
+ def closing_balance
71
+
72
+ @_closing_balance ||= scan_figure( 'Closing Balance' )
73
+
74
+ end
112
75
 
113
- # grab lines + get the longest
114
- lines = page_str.lines
115
- max_length = lines.map( &:length ).max
76
+ # Returns the total value of payments in during the statement read from the table on the first page (ie: not calculated)
77
+ def payments_in
78
+
79
+ @_payments_in ||= scan_figure( 'Payments In' )
80
+
81
+ end
116
82
 
117
- lines.map{ |line| parse_line( line.rstrip.ljust( max_length ))}.compact
83
+ # Returns the total value of payments out during the statement read from the table on the first page (ie: not calculated)
84
+ def payments_out
118
85
 
119
- end
120
-
121
- def parse_line( line_str )
86
+ @_payments_out ||= scan_figure( 'Payments Out' )
87
+
88
+ end
122
89
 
123
- # if we’re blank…
124
- return nil if line_str.strip.empty?
90
+ private
125
91
 
126
- # start cutting things up
127
- row = {
128
- date: empty_to_nil( line_str[0..12] ),
129
- type: empty_to_nil( line_str[12..20] ),
130
- details: empty_to_nil( line_str[20..70] ),
131
- out: empty_to_nil( line_str[70..90] ),
132
- in: empty_to_nil( line_str[90..110] ),
133
- balance: empty_to_nil( line_str[110..130] )
134
- }
92
+ def document_text
93
+
94
+ @text ||= begin
95
+
96
+ @reader.pages.map( &:text ).join
97
+
98
+ end
99
+
100
+ end
135
101
 
136
- # munge things further
137
- row[:date] = Date.strptime( row[:date], '%d %b %y' ) unless row[:date].nil?
138
- row[:out] = row[:out].gsub( ',', '' ).to_f unless row[:out].nil?
139
- row[:in] = row[:in].gsub( ',', '' ).to_f unless row[:in].nil?
140
- row[:balance] = row[:balance].gsub( ',', '' ).to_f unless row[:balance].nil?
102
+ def scan_figure( search_string )
103
+
104
+ @_first_page ||= @reader.pages.first.text
105
+
106
+ match = Regexp.new( "#{search_string}(?:.*?)([0-9\.\,]{4,})", Regexp::IGNORECASE ).match( @_first_page )
107
+ return nil if match.nil?
108
+
109
+ match[1].gsub( ',', '' ).to_f
110
+
111
+ end
141
112
 
142
- # set a change amount
143
- row[:change] = if !row[:out].nil?
144
- 0 - row[:out]
145
- elsif !row[:in].nil?
146
- row[:in]
147
- else
148
- nil
113
+ def parse_page( page_str )
114
+
115
+ # grab lines + get the longest
116
+ lines = page_str.lines
117
+ max_length = lines.map( &:length ).max
118
+
119
+ lines.map{ |line| parse_line( line.rstrip.ljust( max_length ))}.compact
120
+
149
121
  end
150
122
 
151
- # return
152
- row
123
+ def parse_line( line_str )
153
124
 
154
- end
155
-
156
- def empty_to_nil( str )
125
+ # if we’re blank…
126
+ return nil if line_str.strip.empty?
127
+
128
+ # start cutting things up
129
+ row = {
130
+ date: empty_to_nil( line_str[0..12] ),
131
+ type: empty_to_nil( line_str[12..20] ),
132
+ details: empty_to_nil( line_str[20..70] ),
133
+ out: empty_to_nil( line_str[70..90] ),
134
+ in: empty_to_nil( line_str[90..110] ),
135
+ balance: empty_to_nil( line_str[110..130] )
136
+ }
137
+
138
+ # munge things further
139
+ row[:date] = Date.strptime( row[:date], '%d %b %y' ) unless row[:date].nil?
140
+ row[:out] = row[:out].gsub( ',', '' ).to_f unless row[:out].nil?
141
+ row[:in] = row[:in].gsub( ',', '' ).to_f unless row[:in].nil?
142
+ row[:balance] = row[:balance].gsub( ',', '' ).to_f unless row[:balance].nil?
143
+
144
+ # set a change amount
145
+ row[:change] = if !row[:out].nil?
146
+ 0 - row[:out]
147
+ elsif !row[:in].nil?
148
+ row[:in]
149
+ else
150
+ nil
151
+ end
152
+
153
+ # return
154
+ row
155
+
156
+ end
157
157
 
158
- ( str.strip!.empty? ) ? nil : str
158
+ def empty_to_nil( str )
159
+
160
+ ( str.strip!.empty? ) ? nil : str
161
+
162
+ end
159
163
 
160
164
  end
161
165
 
@@ -1,3 +1,3 @@
1
1
  module HsbcPdfStatementParser
2
- VERSION = "1.0.0"
2
+ VERSION = "1.0.1"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hsbc_pdf_statement_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jon Pearse
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-11-24 00:00:00.000000000 Z
11
+ date: 2019-11-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: pdf-reader