ng-bank-parser 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +35 -0
  3. data/.rspec +2 -0
  4. data/CODE_OF_CONDUCT.md +13 -0
  5. data/Gemfile +4 -0
  6. data/Gemfile.lock +80 -0
  7. data/LICENSE +340 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +92 -0
  10. data/Rakefile +6 -0
  11. data/bin/console +14 -0
  12. data/bin/setup +7 -0
  13. data/lib/ng-bank-parser.rb +11 -0
  14. data/lib/ng-bank-parser/banks.rb +24 -0
  15. data/lib/ng-bank-parser/fixtures/firstbank-pdf-invalid.xlsx +0 -0
  16. data/lib/ng-bank-parser/fixtures/firstbank-pdf-valid.pdf +0 -0
  17. data/lib/ng-bank-parser/fixtures/gtb-excel-invalid.pdf +0 -0
  18. data/lib/ng-bank-parser/fixtures/gtb-excel-valid.xls +1332 -0
  19. data/lib/ng-bank-parser/fixtures/gtb-excel-valid.xlsx +0 -0
  20. data/lib/ng-bank-parser/fixtures/uba-pdf-invalid.pdf +0 -0
  21. data/lib/ng-bank-parser/fixtures/uba-pdf-valid.pdf +0 -0
  22. data/lib/ng-bank-parser/parsers/firstbank-pdf-parser.rb +81 -0
  23. data/lib/ng-bank-parser/parsers/firstbank-pdf-parser/helpers.rb +172 -0
  24. data/lib/ng-bank-parser/parsers/firstbank-pdf-parser/statement_utils.rb +51 -0
  25. data/lib/ng-bank-parser/parsers/gtb-excel-parser.rb +66 -0
  26. data/lib/ng-bank-parser/parsers/gtb-excel-parser/helpers.rb +121 -0
  27. data/lib/ng-bank-parser/parsers/uba-pdf-parser.rb +98 -0
  28. data/lib/ng-bank-parser/parsers/uba-pdf-parser/classes/string.rb +56 -0
  29. data/lib/ng-bank-parser/parsers/uba-pdf-parser/classes/transaction.rb +73 -0
  30. data/lib/ng-bank-parser/parsers/uba-pdf-parser/constants.rb +32 -0
  31. data/lib/ng-bank-parser/parsers/uba-pdf-parser/pdf_checks.rb +58 -0
  32. data/lib/ng-bank-parser/parsers/uba-pdf-parser/transaction_tools.rb +110 -0
  33. data/lib/ng-bank-parser/pdf-unlocker.rb +24 -0
  34. data/lib/ng-bank-parser/router.rb +44 -0
  35. data/lib/ng-bank-parser/version.rb +3 -0
  36. data/ng-bank-parser.gemspec +31 -0
  37. metadata +198 -0
@@ -0,0 +1,81 @@
1
+ require 'pdf-reader'
2
+ require_relative 'firstbank-pdf-parser/helpers'
3
+
4
+
5
+ module NgBankParser
6
+ class FirstbankPdf
7
+ extend FirstbankPdfHelpers
8
+
9
+ @@transactions = []
10
+
11
+ def self.parse(path, password = nil)
12
+ accepted_formats = [".pdf"];
13
+ unless accepted_formats.include? File.extname(path)
14
+ return error_message 'Invalid file format'
15
+ end
16
+
17
+ if has_encryption? path
18
+ if password
19
+ unless get_unlocked_pdf? path, password
20
+ return error_message 'Password supplied for decryption is invalid.'
21
+ end
22
+ else
23
+ return error_message 'PDF File is encrypted and no password was supplied'
24
+ end
25
+ end
26
+
27
+ unless contains_account_data?
28
+ return error_message 'Unable to read account details'
29
+ end
30
+
31
+ if contains_transactions_table?
32
+ extract_transactions(clean(get_raw_transactions))
33
+ data = {}
34
+ data[:bank_name] = 'First Bank'
35
+ data[:account_number] = get_account_number
36
+ data[:account_name] = get_account_name
37
+ data[:from_date] = get_from_date
38
+ data[:to_date] = get_to_date
39
+ data[:transactions] = @@transactions
40
+ send_response data
41
+ else
42
+ return error_message 'Could not find any transactions'
43
+ end
44
+
45
+ end
46
+
47
+
48
+ private
49
+
50
+ def self.extract_transactions(jagged_array = [[]])
51
+ jagged_array.each do |array|
52
+ if is_transaction_row? array
53
+ transaction = {}
54
+ transaction[:ref] = ''
55
+ transaction[:date] = Date.strptime(array[0], '%d-%b-%y')
56
+ transaction[:remarks] = array[1]
57
+ transaction[:amount] = array[3].delete(',').to_f
58
+ transaction[:balance] = array[4].delete(',').to_f
59
+ if transaction[:balance].to_i > get_last_balance
60
+ transaction[:type] = 'credit'
61
+ update_last_balance transaction[:balance]
62
+ else
63
+ transaction[:type] = 'debit'
64
+ update_last_balance transaction[:balance]
65
+ end
66
+ @@transactions << transaction
67
+ else
68
+ @@transactions.last[:remarks] += array[0] if @@transactions
69
+ end
70
+ end
71
+ end
72
+
73
+
74
+ def self.clean(jagged_array = [[]])
75
+ jagged_array.reject! do |array|
76
+ is_row_invalid? array
77
+ end
78
+ end
79
+
80
+ end
81
+ end
@@ -0,0 +1,172 @@
1
+ require 'pdf-reader'
2
+ require 'date'
3
+ require 'open-uri'
4
+ require_relative 'statement_utils'
5
+ require_relative '../../pdf-unlocker.rb'
6
+
7
+ module NgBankParser
8
+ module FirstbankPdfHelpers
9
+ include StatementUtils
10
+
11
+ @@pdf_reader = nil
12
+ @@raw_transactions = [[]]
13
+
14
+ def has_encryption? path
15
+ begin
16
+ @@pdf_reader = PDF::Reader.new(path)
17
+ false
18
+ rescue PDF::Reader::EncryptedPDFError
19
+ true
20
+ end
21
+ end
22
+
23
+
24
+ def get_unlocked_pdf? path, password
25
+ response = PDFUnlocker.new(File.new(path), password).unlocked_pdf
26
+ return false unless response
27
+ if response.include? 'Unlock Failed'
28
+ return false
29
+ else
30
+ pseudo_file = StringIO.new
31
+ pseudo_file.write(response)
32
+ @@pdf_reader = PDF::Reader.new(pseudo_file)
33
+ return true
34
+ end
35
+ end
36
+
37
+
38
+ def get_raw_transactions
39
+ @@raw_transactions
40
+ end
41
+
42
+
43
+ def get_transaction_data
44
+ pages = get_pages @@pdf_reader
45
+ pages.each do |page|
46
+ page_text = get_page_text page
47
+ index = get_transaction_table_index page_text
48
+ unless index == -1
49
+ add_to_transactions page_text[index..-1]
50
+ end
51
+ end
52
+ end
53
+
54
+
55
+ def get_account_data
56
+ lines = get_first_page_text @@pdf_reader
57
+ lines.each do |line|
58
+ if line[0].start_with? 'Account No:'
59
+ set_account_number line
60
+ set_last_balance line
61
+ elsif line[0].start_with? 'Account Name:'
62
+ set_account_name line
63
+ elsif line[0].start_with? 'For the Period of:'
64
+ set_statement_period line
65
+ end
66
+ end
67
+ end
68
+
69
+
70
+ def get_account_number
71
+ @@account_number
72
+ end
73
+
74
+
75
+ def get_account_name
76
+ @@account_name
77
+ end
78
+
79
+
80
+ def get_last_balance
81
+ @@last_balance.to_i
82
+ end
83
+
84
+ def get_from_date
85
+ Date.strptime(@@from_date.strip,"%d-%b-%Y")
86
+ end
87
+
88
+
89
+ def get_to_date
90
+ Date.strptime(@@to_date.strip,"%d-%b-%Y")
91
+ end
92
+
93
+
94
+ def contains_transactions_table?
95
+ get_transaction_data
96
+ @@raw_transactions
97
+ end
98
+
99
+
100
+ def contains_account_data?
101
+ get_account_data
102
+ @@account_name && @@account_number && @@last_balance && @@statement_period
103
+ end
104
+
105
+
106
+ def set_account_number line
107
+ @@account_number = line[1] unless line[1].blank?
108
+ end
109
+
110
+
111
+ def set_account_name line
112
+ @@account_name = line[1] unless line[1].blank?
113
+ end
114
+
115
+
116
+ def set_last_balance line
117
+ @@last_balance = line[2] unless line[1].blank?
118
+ end
119
+
120
+
121
+ def update_last_balance balance
122
+ @@last_balance = balance
123
+ end
124
+
125
+
126
+ def set_statement_period line
127
+ unless line[1].blank?
128
+ @@statement_period = line[1].split('to')
129
+ @@from_date, @@to_date = @@statement_period
130
+ end
131
+ end
132
+
133
+
134
+ def is_transaction_row? row
135
+ row[0] =~ /(\d\d-[a-zA-Z]{3}-\d\d)/
136
+ end
137
+
138
+
139
+ def is_row_invalid? row
140
+ row.length == 0 ||
141
+ row[0].start_with?('END OF STATEMENT') ||
142
+ row[0] == ('Balance B/F') ||
143
+ row[0].start_with?('Page')
144
+ end
145
+
146
+
147
+ def error_message msg
148
+ return {
149
+ status: 0,
150
+ message: msg
151
+ }
152
+ end
153
+
154
+
155
+ def send_response data
156
+ return {
157
+ status: 1,
158
+ data: data
159
+ }
160
+ end
161
+
162
+
163
+ private
164
+
165
+ def add_to_transactions lines
166
+ lines.each do |line|
167
+ @@raw_transactions << line.strip.split(/\s\s+/)
168
+ end
169
+ end
170
+
171
+ end
172
+ end
@@ -0,0 +1,51 @@
1
+ module NgBankParser
2
+ module StatementUtils
3
+
4
+ def get_first_page_text reader
5
+ lines = reader.pages.first.text.remove_empty_lines.lines #lines without the spaces
6
+ lines.map{ |line| split_on_2_or_more_spaces(line) }
7
+ end
8
+
9
+
10
+ def get_all_text reader
11
+ all_lines = []
12
+ reader.pages.each do |page|
13
+ lines_of_page = page.text.remove_empty_lines.lines
14
+ all_lines += lines_of_page
15
+ end
16
+ all_lines
17
+ end
18
+
19
+
20
+ def get_page_text page_text
21
+ page_text.text.remove_empty_lines.lines
22
+ end
23
+
24
+
25
+ def get_pages reader
26
+ reader.pages
27
+ end
28
+
29
+
30
+ def get_transaction_table_index lines
31
+ lines_in_file = lines.map{ |line| split_on_2_or_more_spaces(line) }
32
+ lines_in_file.each_with_index do |line, index|
33
+ if line[0] == 'TransDate'
34
+ return index + 1
35
+ end
36
+ end
37
+ return -1 #no transactions found on page
38
+ end
39
+
40
+
41
+ private
42
+
43
+ def split_on_2_or_more_spaces str
44
+ str.strip.split(/\s\s+/)
45
+ end
46
+
47
+ def remove_empty_lines
48
+ self.gsub /^$\n/, ''
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,66 @@
1
+ require_relative 'gtb-excel-parser/helpers'
2
+
3
+ module NgBankParser
4
+ class GtbExcel
5
+ extend GtbExcelHelpers
6
+
7
+ class << self
8
+ def parse(path, password = nil)
9
+ accepted_formats = [".xls",".xlsx"];
10
+ unless accepted_formats.include? File.extname(path)
11
+ return error_message("Invalid file format")
12
+ end
13
+
14
+ file = read_file_contents(path)
15
+ if (file[:type] == "html")
16
+ html_parse(file[:contents])
17
+ elsif (file[:type] == "xls")
18
+ xls_parse(file[:contents])
19
+ else
20
+ return error_message("Could not parse this file")
21
+ end
22
+ end
23
+
24
+ def html_parse(file)
25
+ data = {}
26
+
27
+ data[:transactions] = get_transactions_from_html(file)
28
+ data[:account_number] = file.css("#lblAcctNo").text().return_first_number
29
+ data[:from_date] = file.css("#lblPeriod1").text().convert_string_to_date
30
+ data[:to_date] = file.css("#lblPeriod2").text().convert_string_to_date
31
+ data[:account_name] = file.css("#lblAcctName").text()
32
+ data[:bank_name] = "Guaranty Trust Bank"
33
+
34
+ send_response(data)
35
+ end
36
+
37
+ def xls_parse(file)
38
+ data = {}
39
+
40
+ data[:transactions] = get_transactions_from_excel(file)
41
+ data[:account_number] = file.row(10)[0].return_first_number
42
+ date_strings = file.row(14)[0].get_date_strings
43
+ data[:from_date] = date_strings[0].convert_string_to_date
44
+ data[:to_date] = date_strings[1].convert_string_to_date
45
+ data[:account_name] = file.row(5)[0]
46
+ data[:bank_name] = "Guaranty Trust Bank"
47
+
48
+ send_response(data)
49
+ end
50
+
51
+ def error_message(text)
52
+ return {
53
+ status: 0,
54
+ message: text
55
+ }
56
+ end
57
+
58
+ def send_response(data)
59
+ return {
60
+ status: 1,
61
+ data: data
62
+ }
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,121 @@
1
+ require 'nokogiri'
2
+ require 'roo'
3
+ require 'date'
4
+
5
+ class Hash
6
+ def without(*keys)
7
+ cpy = self.dup
8
+ keys.each { |key| cpy.delete(key) }
9
+ cpy
10
+ end
11
+ end
12
+
13
+ class String
14
+ def convert_to_number
15
+ self.to_s.scan(/\b-?[\d.]+/).join.to_f
16
+ end
17
+ def return_first_number
18
+ self.scan(/\d+/)[0]
19
+ end
20
+ def convert_string_to_date
21
+ date_string = self.scan(/.....\d*..\d{4}/)[0]
22
+ Date.strptime(date_string,"%d/%b/%Y")
23
+ end
24
+ def get_date_strings
25
+ self.scan(/.....\d*..\d{4}/)
26
+ end
27
+ end
28
+
29
+
30
+ module GtbExcelHelpers
31
+ def read_file_contents(path)
32
+ if (File.extname(path) == '.xls')
33
+ contents = Nokogiri::HTML(open(path))
34
+ if has_transactions_table(contents)
35
+ {type: "html", contents: contents}
36
+ else
37
+ { type: "unknown" }
38
+ end
39
+ else
40
+ contents = Roo::Excelx.new(path)
41
+ {type: "xls", contents: contents}
42
+ end
43
+ end
44
+
45
+ def has_transactions_table(contents)
46
+ contents.css("#dgtrans")
47
+ end
48
+
49
+ def get_transactions_from_html(file)
50
+ extract_transaction_rows_from_html(file)
51
+ convert_html_rows_to_transactions
52
+ end
53
+
54
+ def extract_transaction_rows_from_html(file)
55
+ @rows = file.xpath('//table[@id="dgtrans"]/tr') # Get transaction table
56
+ @rows.shift # Remove header row
57
+ end
58
+
59
+ def convert_html_rows_to_transactions
60
+ transactions = @rows.collect do |row|
61
+ transaction = {}
62
+ [
63
+ [:date, 'td[1]/text()'], #date
64
+ [:ref, 'td[2]/text()'], #ref
65
+ [:debit, 'td[4]/text()'], #debit
66
+ [:credit, 'td[5]/text()'], #credit
67
+ [:balance, 'td[6]/text()'], #balance
68
+ [:remarks, 'td[7]/text()'], #remarks
69
+ ].each do |column_name, xpath|
70
+ integer_columns = [:debit, :credit, :balance]
71
+ column_value = row.at_xpath(xpath).text()
72
+
73
+ # If it's an integer field convert to number
74
+ if integer_columns.include?(column_name)
75
+ column_value = column_value.convert_to_number;
76
+ end
77
+
78
+ transaction[column_name] = column_value
79
+ transaction[:date] = Date.strptime(column_value,"%d-%b-%Y") if column_name == :date
80
+ end
81
+
82
+ filter_debit_or_credit(transaction)
83
+ end
84
+ filter_invalid(transactions)
85
+ end
86
+
87
+ def filter_invalid(transactions)
88
+ transactions.select do |row|
89
+ is_valid_transaction(row)
90
+ end
91
+ end
92
+
93
+ def filter_debit_or_credit(transaction)
94
+ if (transaction[:debit].nil? || transaction[:debit] == 0)
95
+ transaction[:type] = "credit"
96
+ transaction[:amount] = transaction[:credit]
97
+ else
98
+ transaction[:type] = "debit"
99
+ transaction[:amount] = transaction[:debit]
100
+ end
101
+
102
+ # Remove credit and debit keys
103
+ transaction.without(:debit, :credit)
104
+ end
105
+
106
+ def get_transactions_from_excel(file)
107
+ transactions = []
108
+ file.each(date: 'Trans Date', ref: 'Reference', debit: 'Debit', credit: 'Credit', balance: 'Balance', remarks: 'Remarks') do |row|
109
+ if is_valid_transaction(row)
110
+ transaction = filter_debit_or_credit(row)
111
+ transaction[:ref] = transaction[:ref].to_s
112
+ transactions << transaction
113
+ end
114
+ end
115
+ transactions
116
+ end
117
+
118
+ def is_valid_transaction(row)
119
+ return row[:date].is_a?(Date) && (row[:amount] || row[:credit] || row[:debit]).is_a?(Float) && row[:balance].is_a?(Float) && !row[:remarks].empty?
120
+ end
121
+ end