ng-bank-parser 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +35 -0
  3. data/.rspec +2 -0
  4. data/CODE_OF_CONDUCT.md +13 -0
  5. data/Gemfile +4 -0
  6. data/Gemfile.lock +80 -0
  7. data/LICENSE +340 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +92 -0
  10. data/Rakefile +6 -0
  11. data/bin/console +14 -0
  12. data/bin/setup +7 -0
  13. data/lib/ng-bank-parser.rb +11 -0
  14. data/lib/ng-bank-parser/banks.rb +24 -0
  15. data/lib/ng-bank-parser/fixtures/firstbank-pdf-invalid.xlsx +0 -0
  16. data/lib/ng-bank-parser/fixtures/firstbank-pdf-valid.pdf +0 -0
  17. data/lib/ng-bank-parser/fixtures/gtb-excel-invalid.pdf +0 -0
  18. data/lib/ng-bank-parser/fixtures/gtb-excel-valid.xls +1332 -0
  19. data/lib/ng-bank-parser/fixtures/gtb-excel-valid.xlsx +0 -0
  20. data/lib/ng-bank-parser/fixtures/uba-pdf-invalid.pdf +0 -0
  21. data/lib/ng-bank-parser/fixtures/uba-pdf-valid.pdf +0 -0
  22. data/lib/ng-bank-parser/parsers/firstbank-pdf-parser.rb +81 -0
  23. data/lib/ng-bank-parser/parsers/firstbank-pdf-parser/helpers.rb +172 -0
  24. data/lib/ng-bank-parser/parsers/firstbank-pdf-parser/statement_utils.rb +51 -0
  25. data/lib/ng-bank-parser/parsers/gtb-excel-parser.rb +66 -0
  26. data/lib/ng-bank-parser/parsers/gtb-excel-parser/helpers.rb +121 -0
  27. data/lib/ng-bank-parser/parsers/uba-pdf-parser.rb +98 -0
  28. data/lib/ng-bank-parser/parsers/uba-pdf-parser/classes/string.rb +56 -0
  29. data/lib/ng-bank-parser/parsers/uba-pdf-parser/classes/transaction.rb +73 -0
  30. data/lib/ng-bank-parser/parsers/uba-pdf-parser/constants.rb +32 -0
  31. data/lib/ng-bank-parser/parsers/uba-pdf-parser/pdf_checks.rb +58 -0
  32. data/lib/ng-bank-parser/parsers/uba-pdf-parser/transaction_tools.rb +110 -0
  33. data/lib/ng-bank-parser/pdf-unlocker.rb +24 -0
  34. data/lib/ng-bank-parser/router.rb +44 -0
  35. data/lib/ng-bank-parser/version.rb +3 -0
  36. data/ng-bank-parser.gemspec +31 -0
  37. metadata +198 -0
@@ -0,0 +1,81 @@
1
+ require 'pdf-reader'
2
+ require_relative 'firstbank-pdf-parser/helpers'
3
+
4
+
5
+ module NgBankParser
6
+ class FirstbankPdf
7
+ extend FirstbankPdfHelpers
8
+
9
+ @@transactions = []
10
+
11
+ def self.parse(path, password = nil)
12
+ accepted_formats = [".pdf"];
13
+ unless accepted_formats.include? File.extname(path)
14
+ return error_message 'Invalid file format'
15
+ end
16
+
17
+ if has_encryption? path
18
+ if password
19
+ unless get_unlocked_pdf? path, password
20
+ return error_message 'Password supplied for decryption is invalid.'
21
+ end
22
+ else
23
+ return error_message 'PDF File is encrypted and no password was supplied'
24
+ end
25
+ end
26
+
27
+ unless contains_account_data?
28
+ return error_message 'Unable to read account details'
29
+ end
30
+
31
+ if contains_transactions_table?
32
+ extract_transactions(clean(get_raw_transactions))
33
+ data = {}
34
+ data[:bank_name] = 'First Bank'
35
+ data[:account_number] = get_account_number
36
+ data[:account_name] = get_account_name
37
+ data[:from_date] = get_from_date
38
+ data[:to_date] = get_to_date
39
+ data[:transactions] = @@transactions
40
+ send_response data
41
+ else
42
+ return error_message 'Could not find any transactions'
43
+ end
44
+
45
+ end
46
+
47
+
48
+ private
49
+
50
+ def self.extract_transactions(jagged_array = [[]])
51
+ jagged_array.each do |array|
52
+ if is_transaction_row? array
53
+ transaction = {}
54
+ transaction[:ref] = ''
55
+ transaction[:date] = Date.strptime(array[0], '%d-%b-%y')
56
+ transaction[:remarks] = array[1]
57
+ transaction[:amount] = array[3].delete(',').to_f
58
+ transaction[:balance] = array[4].delete(',').to_f
59
+ if transaction[:balance].to_i > get_last_balance
60
+ transaction[:type] = 'credit'
61
+ update_last_balance transaction[:balance]
62
+ else
63
+ transaction[:type] = 'debit'
64
+ update_last_balance transaction[:balance]
65
+ end
66
+ @@transactions << transaction
67
+ else
68
+ @@transactions.last[:remarks] += array[0] if @@transactions
69
+ end
70
+ end
71
+ end
72
+
73
+
74
+ def self.clean(jagged_array = [[]])
75
+ jagged_array.reject! do |array|
76
+ is_row_invalid? array
77
+ end
78
+ end
79
+
80
+ end
81
+ end
@@ -0,0 +1,172 @@
1
+ require 'pdf-reader'
2
+ require 'date'
3
+ require 'open-uri'
4
+ require_relative 'statement_utils'
5
+ require_relative '../../pdf-unlocker.rb'
6
+
7
+ module NgBankParser
8
+ module FirstbankPdfHelpers
9
+ include StatementUtils
10
+
11
+ @@pdf_reader = nil
12
+ @@raw_transactions = [[]]
13
+
14
+ def has_encryption? path
15
+ begin
16
+ @@pdf_reader = PDF::Reader.new(path)
17
+ false
18
+ rescue PDF::Reader::EncryptedPDFError
19
+ true
20
+ end
21
+ end
22
+
23
+
24
+ def get_unlocked_pdf? path, password
25
+ response = PDFUnlocker.new(File.new(path), password).unlocked_pdf
26
+ return false unless response
27
+ if response.include? 'Unlock Failed'
28
+ return false
29
+ else
30
+ pseudo_file = StringIO.new
31
+ pseudo_file.write(response)
32
+ @@pdf_reader = PDF::Reader.new(pseudo_file)
33
+ return true
34
+ end
35
+ end
36
+
37
+
38
+ def get_raw_transactions
39
+ @@raw_transactions
40
+ end
41
+
42
+
43
+ def get_transaction_data
44
+ pages = get_pages @@pdf_reader
45
+ pages.each do |page|
46
+ page_text = get_page_text page
47
+ index = get_transaction_table_index page_text
48
+ unless index == -1
49
+ add_to_transactions page_text[index..-1]
50
+ end
51
+ end
52
+ end
53
+
54
+
55
+ def get_account_data
56
+ lines = get_first_page_text @@pdf_reader
57
+ lines.each do |line|
58
+ if line[0].start_with? 'Account No:'
59
+ set_account_number line
60
+ set_last_balance line
61
+ elsif line[0].start_with? 'Account Name:'
62
+ set_account_name line
63
+ elsif line[0].start_with? 'For the Period of:'
64
+ set_statement_period line
65
+ end
66
+ end
67
+ end
68
+
69
+
70
+ def get_account_number
71
+ @@account_number
72
+ end
73
+
74
+
75
+ def get_account_name
76
+ @@account_name
77
+ end
78
+
79
+
80
+ def get_last_balance
81
+ @@last_balance.to_i
82
+ end
83
+
84
+ def get_from_date
85
+ Date.strptime(@@from_date.strip,"%d-%b-%Y")
86
+ end
87
+
88
+
89
+ def get_to_date
90
+ Date.strptime(@@to_date.strip,"%d-%b-%Y")
91
+ end
92
+
93
+
94
+ def contains_transactions_table?
95
+ get_transaction_data
96
+ @@raw_transactions
97
+ end
98
+
99
+
100
+ def contains_account_data?
101
+ get_account_data
102
+ @@account_name && @@account_number && @@last_balance && @@statement_period
103
+ end
104
+
105
+
106
+ def set_account_number line
107
+ @@account_number = line[1] unless line[1].blank?
108
+ end
109
+
110
+
111
+ def set_account_name line
112
+ @@account_name = line[1] unless line[1].blank?
113
+ end
114
+
115
+
116
+ def set_last_balance line
117
+ @@last_balance = line[2] unless line[1].blank?
118
+ end
119
+
120
+
121
+ def update_last_balance balance
122
+ @@last_balance = balance
123
+ end
124
+
125
+
126
+ def set_statement_period line
127
+ unless line[1].blank?
128
+ @@statement_period = line[1].split('to')
129
+ @@from_date, @@to_date = @@statement_period
130
+ end
131
+ end
132
+
133
+
134
+ def is_transaction_row? row
135
+ row[0] =~ /(\d\d-[a-zA-Z]{3}-\d\d)/
136
+ end
137
+
138
+
139
+ def is_row_invalid? row
140
+ row.length == 0 ||
141
+ row[0].start_with?('END OF STATEMENT') ||
142
+ row[0] == ('Balance B/F') ||
143
+ row[0].start_with?('Page')
144
+ end
145
+
146
+
147
+ def error_message msg
148
+ return {
149
+ status: 0,
150
+ message: msg
151
+ }
152
+ end
153
+
154
+
155
+ def send_response data
156
+ return {
157
+ status: 1,
158
+ data: data
159
+ }
160
+ end
161
+
162
+
163
+ private
164
+
165
+ def add_to_transactions lines
166
+ lines.each do |line|
167
+ @@raw_transactions << line.strip.split(/\s\s+/)
168
+ end
169
+ end
170
+
171
+ end
172
+ end
@@ -0,0 +1,51 @@
1
+ module NgBankParser
2
+ module StatementUtils
3
+
4
+ def get_first_page_text reader
5
+ lines = reader.pages.first.text.remove_empty_lines.lines #lines without the spaces
6
+ lines.map{ |line| split_on_2_or_more_spaces(line) }
7
+ end
8
+
9
+
10
+ def get_all_text reader
11
+ all_lines = []
12
+ reader.pages.each do |page|
13
+ lines_of_page = page.text.remove_empty_lines.lines
14
+ all_lines += lines_of_page
15
+ end
16
+ all_lines
17
+ end
18
+
19
+
20
+ def get_page_text page_text
21
+ page_text.text.remove_empty_lines.lines
22
+ end
23
+
24
+
25
+ def get_pages reader
26
+ reader.pages
27
+ end
28
+
29
+
30
+ def get_transaction_table_index lines
31
+ lines_in_file = lines.map{ |line| split_on_2_or_more_spaces(line) }
32
+ lines_in_file.each_with_index do |line, index|
33
+ if line[0] == 'TransDate'
34
+ return index + 1
35
+ end
36
+ end
37
+ return -1 #no transactions found on page
38
+ end
39
+
40
+
41
+ private
42
+
43
+ def split_on_2_or_more_spaces str
44
+ str.strip.split(/\s\s+/)
45
+ end
46
+
47
+ def remove_empty_lines
48
+ self.gsub /^$\n/, ''
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,66 @@
1
+ require_relative 'gtb-excel-parser/helpers'
2
+
3
+ module NgBankParser
4
+ class GtbExcel
5
+ extend GtbExcelHelpers
6
+
7
+ class << self
8
+ def parse(path, password = nil)
9
+ accepted_formats = [".xls",".xlsx"];
10
+ unless accepted_formats.include? File.extname(path)
11
+ return error_message("Invalid file format")
12
+ end
13
+
14
+ file = read_file_contents(path)
15
+ if (file[:type] == "html")
16
+ html_parse(file[:contents])
17
+ elsif (file[:type] == "xls")
18
+ xls_parse(file[:contents])
19
+ else
20
+ return error_message("Could not parse this file")
21
+ end
22
+ end
23
+
24
+ def html_parse(file)
25
+ data = {}
26
+
27
+ data[:transactions] = get_transactions_from_html(file)
28
+ data[:account_number] = file.css("#lblAcctNo").text().return_first_number
29
+ data[:from_date] = file.css("#lblPeriod1").text().convert_string_to_date
30
+ data[:to_date] = file.css("#lblPeriod2").text().convert_string_to_date
31
+ data[:account_name] = file.css("#lblAcctName").text()
32
+ data[:bank_name] = "Guaranty Trust Bank"
33
+
34
+ send_response(data)
35
+ end
36
+
37
+ def xls_parse(file)
38
+ data = {}
39
+
40
+ data[:transactions] = get_transactions_from_excel(file)
41
+ data[:account_number] = file.row(10)[0].return_first_number
42
+ date_strings = file.row(14)[0].get_date_strings
43
+ data[:from_date] = date_strings[0].convert_string_to_date
44
+ data[:to_date] = date_strings[1].convert_string_to_date
45
+ data[:account_name] = file.row(5)[0]
46
+ data[:bank_name] = "Guaranty Trust Bank"
47
+
48
+ send_response(data)
49
+ end
50
+
51
+ def error_message(text)
52
+ return {
53
+ status: 0,
54
+ message: text
55
+ }
56
+ end
57
+
58
+ def send_response(data)
59
+ return {
60
+ status: 1,
61
+ data: data
62
+ }
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,121 @@
1
+ require 'nokogiri'
2
+ require 'roo'
3
+ require 'date'
4
+
5
+ class Hash
6
+ def without(*keys)
7
+ cpy = self.dup
8
+ keys.each { |key| cpy.delete(key) }
9
+ cpy
10
+ end
11
+ end
12
+
13
+ class String
14
+ def convert_to_number
15
+ self.to_s.scan(/\b-?[\d.]+/).join.to_f
16
+ end
17
+ def return_first_number
18
+ self.scan(/\d+/)[0]
19
+ end
20
+ def convert_string_to_date
21
+ date_string = self.scan(/.....\d*..\d{4}/)[0]
22
+ Date.strptime(date_string,"%d/%b/%Y")
23
+ end
24
+ def get_date_strings
25
+ self.scan(/.....\d*..\d{4}/)
26
+ end
27
+ end
28
+
29
+
30
+ module GtbExcelHelpers
31
+ def read_file_contents(path)
32
+ if (File.extname(path) == '.xls')
33
+ contents = Nokogiri::HTML(open(path))
34
+ if has_transactions_table(contents)
35
+ {type: "html", contents: contents}
36
+ else
37
+ { type: "unknown" }
38
+ end
39
+ else
40
+ contents = Roo::Excelx.new(path)
41
+ {type: "xls", contents: contents}
42
+ end
43
+ end
44
+
45
+ def has_transactions_table(contents)
46
+ contents.css("#dgtrans")
47
+ end
48
+
49
+ def get_transactions_from_html(file)
50
+ extract_transaction_rows_from_html(file)
51
+ convert_html_rows_to_transactions
52
+ end
53
+
54
+ def extract_transaction_rows_from_html(file)
55
+ @rows = file.xpath('//table[@id="dgtrans"]/tr') # Get transaction table
56
+ @rows.shift # Remove header row
57
+ end
58
+
59
+ def convert_html_rows_to_transactions
60
+ transactions = @rows.collect do |row|
61
+ transaction = {}
62
+ [
63
+ [:date, 'td[1]/text()'], #date
64
+ [:ref, 'td[2]/text()'], #ref
65
+ [:debit, 'td[4]/text()'], #debit
66
+ [:credit, 'td[5]/text()'], #credit
67
+ [:balance, 'td[6]/text()'], #balance
68
+ [:remarks, 'td[7]/text()'], #remarks
69
+ ].each do |column_name, xpath|
70
+ integer_columns = [:debit, :credit, :balance]
71
+ column_value = row.at_xpath(xpath).text()
72
+
73
+ # If it's an integer field convert to number
74
+ if integer_columns.include?(column_name)
75
+ column_value = column_value.convert_to_number;
76
+ end
77
+
78
+ transaction[column_name] = column_value
79
+ transaction[:date] = Date.strptime(column_value,"%d-%b-%Y") if column_name == :date
80
+ end
81
+
82
+ filter_debit_or_credit(transaction)
83
+ end
84
+ filter_invalid(transactions)
85
+ end
86
+
87
+ def filter_invalid(transactions)
88
+ transactions.select do |row|
89
+ is_valid_transaction(row)
90
+ end
91
+ end
92
+
93
+ def filter_debit_or_credit(transaction)
94
+ if (transaction[:debit].nil? || transaction[:debit] == 0)
95
+ transaction[:type] = "credit"
96
+ transaction[:amount] = transaction[:credit]
97
+ else
98
+ transaction[:type] = "debit"
99
+ transaction[:amount] = transaction[:debit]
100
+ end
101
+
102
+ # Remove credit and debit keys
103
+ transaction.without(:debit, :credit)
104
+ end
105
+
106
+ def get_transactions_from_excel(file)
107
+ transactions = []
108
+ file.each(date: 'Trans Date', ref: 'Reference', debit: 'Debit', credit: 'Credit', balance: 'Balance', remarks: 'Remarks') do |row|
109
+ if is_valid_transaction(row)
110
+ transaction = filter_debit_or_credit(row)
111
+ transaction[:ref] = transaction[:ref].to_s
112
+ transactions << transaction
113
+ end
114
+ end
115
+ transactions
116
+ end
117
+
118
+ def is_valid_transaction(row)
119
+ return row[:date].is_a?(Date) && (row[:amount] || row[:credit] || row[:debit]).is_a?(Float) && row[:balance].is_a?(Float) && !row[:remarks].empty?
120
+ end
121
+ end