ng-bank-parser 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +35 -0
- data/.rspec +2 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +80 -0
- data/LICENSE +340 -0
- data/LICENSE.txt +21 -0
- data/README.md +92 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/lib/ng-bank-parser.rb +11 -0
- data/lib/ng-bank-parser/banks.rb +24 -0
- data/lib/ng-bank-parser/fixtures/firstbank-pdf-invalid.xlsx +0 -0
- data/lib/ng-bank-parser/fixtures/firstbank-pdf-valid.pdf +0 -0
- data/lib/ng-bank-parser/fixtures/gtb-excel-invalid.pdf +0 -0
- data/lib/ng-bank-parser/fixtures/gtb-excel-valid.xls +1332 -0
- data/lib/ng-bank-parser/fixtures/gtb-excel-valid.xlsx +0 -0
- data/lib/ng-bank-parser/fixtures/uba-pdf-invalid.pdf +0 -0
- data/lib/ng-bank-parser/fixtures/uba-pdf-valid.pdf +0 -0
- data/lib/ng-bank-parser/parsers/firstbank-pdf-parser.rb +81 -0
- data/lib/ng-bank-parser/parsers/firstbank-pdf-parser/helpers.rb +172 -0
- data/lib/ng-bank-parser/parsers/firstbank-pdf-parser/statement_utils.rb +51 -0
- data/lib/ng-bank-parser/parsers/gtb-excel-parser.rb +66 -0
- data/lib/ng-bank-parser/parsers/gtb-excel-parser/helpers.rb +121 -0
- data/lib/ng-bank-parser/parsers/uba-pdf-parser.rb +98 -0
- data/lib/ng-bank-parser/parsers/uba-pdf-parser/classes/string.rb +56 -0
- data/lib/ng-bank-parser/parsers/uba-pdf-parser/classes/transaction.rb +73 -0
- data/lib/ng-bank-parser/parsers/uba-pdf-parser/constants.rb +32 -0
- data/lib/ng-bank-parser/parsers/uba-pdf-parser/pdf_checks.rb +58 -0
- data/lib/ng-bank-parser/parsers/uba-pdf-parser/transaction_tools.rb +110 -0
- data/lib/ng-bank-parser/pdf-unlocker.rb +24 -0
- data/lib/ng-bank-parser/router.rb +44 -0
- data/lib/ng-bank-parser/version.rb +3 -0
- data/ng-bank-parser.gemspec +31 -0
- metadata +198 -0
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'pdf-reader'
|
2
|
+
require_relative 'firstbank-pdf-parser/helpers'
|
3
|
+
|
4
|
+
|
5
|
+
module NgBankParser
|
6
|
+
class FirstbankPdf
|
7
|
+
extend FirstbankPdfHelpers
|
8
|
+
|
9
|
+
@@transactions = []
|
10
|
+
|
11
|
+
def self.parse(path, password = nil)
|
12
|
+
accepted_formats = [".pdf"];
|
13
|
+
unless accepted_formats.include? File.extname(path)
|
14
|
+
return error_message 'Invalid file format'
|
15
|
+
end
|
16
|
+
|
17
|
+
if has_encryption? path
|
18
|
+
if password
|
19
|
+
unless get_unlocked_pdf? path, password
|
20
|
+
return error_message 'Password supplied for decryption is invalid.'
|
21
|
+
end
|
22
|
+
else
|
23
|
+
return error_message 'PDF File is encrypted and no password was supplied'
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
unless contains_account_data?
|
28
|
+
return error_message 'Unable to read account details'
|
29
|
+
end
|
30
|
+
|
31
|
+
if contains_transactions_table?
|
32
|
+
extract_transactions(clean(get_raw_transactions))
|
33
|
+
data = {}
|
34
|
+
data[:bank_name] = 'First Bank'
|
35
|
+
data[:account_number] = get_account_number
|
36
|
+
data[:account_name] = get_account_name
|
37
|
+
data[:from_date] = get_from_date
|
38
|
+
data[:to_date] = get_to_date
|
39
|
+
data[:transactions] = @@transactions
|
40
|
+
send_response data
|
41
|
+
else
|
42
|
+
return error_message 'Could not find any transactions'
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def self.extract_transactions(jagged_array = [[]])
|
51
|
+
jagged_array.each do |array|
|
52
|
+
if is_transaction_row? array
|
53
|
+
transaction = {}
|
54
|
+
transaction[:ref] = ''
|
55
|
+
transaction[:date] = Date.strptime(array[0], '%d-%b-%y')
|
56
|
+
transaction[:remarks] = array[1]
|
57
|
+
transaction[:amount] = array[3].delete(',').to_f
|
58
|
+
transaction[:balance] = array[4].delete(',').to_f
|
59
|
+
if transaction[:balance].to_i > get_last_balance
|
60
|
+
transaction[:type] = 'credit'
|
61
|
+
update_last_balance transaction[:balance]
|
62
|
+
else
|
63
|
+
transaction[:type] = 'debit'
|
64
|
+
update_last_balance transaction[:balance]
|
65
|
+
end
|
66
|
+
@@transactions << transaction
|
67
|
+
else
|
68
|
+
@@transactions.last[:remarks] += array[0] if @@transactions
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
def self.clean(jagged_array = [[]])
|
75
|
+
jagged_array.reject! do |array|
|
76
|
+
is_row_invalid? array
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,172 @@
|
|
1
|
+
require 'pdf-reader'
|
2
|
+
require 'date'
|
3
|
+
require 'open-uri'
|
4
|
+
require_relative 'statement_utils'
|
5
|
+
require_relative '../../pdf-unlocker.rb'
|
6
|
+
|
7
|
+
module NgBankParser
|
8
|
+
module FirstbankPdfHelpers
|
9
|
+
include StatementUtils
|
10
|
+
|
11
|
+
@@pdf_reader = nil
|
12
|
+
@@raw_transactions = [[]]
|
13
|
+
|
14
|
+
def has_encryption? path
|
15
|
+
begin
|
16
|
+
@@pdf_reader = PDF::Reader.new(path)
|
17
|
+
false
|
18
|
+
rescue PDF::Reader::EncryptedPDFError
|
19
|
+
true
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
def get_unlocked_pdf? path, password
|
25
|
+
response = PDFUnlocker.new(File.new(path), password).unlocked_pdf
|
26
|
+
return false unless response
|
27
|
+
if response.include? 'Unlock Failed'
|
28
|
+
return false
|
29
|
+
else
|
30
|
+
pseudo_file = StringIO.new
|
31
|
+
pseudo_file.write(response)
|
32
|
+
@@pdf_reader = PDF::Reader.new(pseudo_file)
|
33
|
+
return true
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
|
38
|
+
def get_raw_transactions
|
39
|
+
@@raw_transactions
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
def get_transaction_data
|
44
|
+
pages = get_pages @@pdf_reader
|
45
|
+
pages.each do |page|
|
46
|
+
page_text = get_page_text page
|
47
|
+
index = get_transaction_table_index page_text
|
48
|
+
unless index == -1
|
49
|
+
add_to_transactions page_text[index..-1]
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
def get_account_data
|
56
|
+
lines = get_first_page_text @@pdf_reader
|
57
|
+
lines.each do |line|
|
58
|
+
if line[0].start_with? 'Account No:'
|
59
|
+
set_account_number line
|
60
|
+
set_last_balance line
|
61
|
+
elsif line[0].start_with? 'Account Name:'
|
62
|
+
set_account_name line
|
63
|
+
elsif line[0].start_with? 'For the Period of:'
|
64
|
+
set_statement_period line
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
def get_account_number
|
71
|
+
@@account_number
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
def get_account_name
|
76
|
+
@@account_name
|
77
|
+
end
|
78
|
+
|
79
|
+
|
80
|
+
def get_last_balance
|
81
|
+
@@last_balance.to_i
|
82
|
+
end
|
83
|
+
|
84
|
+
def get_from_date
|
85
|
+
Date.strptime(@@from_date.strip,"%d-%b-%Y")
|
86
|
+
end
|
87
|
+
|
88
|
+
|
89
|
+
def get_to_date
|
90
|
+
Date.strptime(@@to_date.strip,"%d-%b-%Y")
|
91
|
+
end
|
92
|
+
|
93
|
+
|
94
|
+
def contains_transactions_table?
|
95
|
+
get_transaction_data
|
96
|
+
@@raw_transactions
|
97
|
+
end
|
98
|
+
|
99
|
+
|
100
|
+
def contains_account_data?
|
101
|
+
get_account_data
|
102
|
+
@@account_name && @@account_number && @@last_balance && @@statement_period
|
103
|
+
end
|
104
|
+
|
105
|
+
|
106
|
+
def set_account_number line
|
107
|
+
@@account_number = line[1] unless line[1].blank?
|
108
|
+
end
|
109
|
+
|
110
|
+
|
111
|
+
def set_account_name line
|
112
|
+
@@account_name = line[1] unless line[1].blank?
|
113
|
+
end
|
114
|
+
|
115
|
+
|
116
|
+
def set_last_balance line
|
117
|
+
@@last_balance = line[2] unless line[1].blank?
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
def update_last_balance balance
|
122
|
+
@@last_balance = balance
|
123
|
+
end
|
124
|
+
|
125
|
+
|
126
|
+
def set_statement_period line
|
127
|
+
unless line[1].blank?
|
128
|
+
@@statement_period = line[1].split('to')
|
129
|
+
@@from_date, @@to_date = @@statement_period
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
|
134
|
+
def is_transaction_row? row
|
135
|
+
row[0] =~ /(\d\d-[a-zA-Z]{3}-\d\d)/
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
def is_row_invalid? row
|
140
|
+
row.length == 0 ||
|
141
|
+
row[0].start_with?('END OF STATEMENT') ||
|
142
|
+
row[0] == ('Balance B/F') ||
|
143
|
+
row[0].start_with?('Page')
|
144
|
+
end
|
145
|
+
|
146
|
+
|
147
|
+
def error_message msg
|
148
|
+
return {
|
149
|
+
status: 0,
|
150
|
+
message: msg
|
151
|
+
}
|
152
|
+
end
|
153
|
+
|
154
|
+
|
155
|
+
def send_response data
|
156
|
+
return {
|
157
|
+
status: 1,
|
158
|
+
data: data
|
159
|
+
}
|
160
|
+
end
|
161
|
+
|
162
|
+
|
163
|
+
private
|
164
|
+
|
165
|
+
def add_to_transactions lines
|
166
|
+
lines.each do |line|
|
167
|
+
@@raw_transactions << line.strip.split(/\s\s+/)
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
end
|
172
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module NgBankParser
|
2
|
+
module StatementUtils
|
3
|
+
|
4
|
+
def get_first_page_text reader
|
5
|
+
lines = reader.pages.first.text.remove_empty_lines.lines #lines without the spaces
|
6
|
+
lines.map{ |line| split_on_2_or_more_spaces(line) }
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
def get_all_text reader
|
11
|
+
all_lines = []
|
12
|
+
reader.pages.each do |page|
|
13
|
+
lines_of_page = page.text.remove_empty_lines.lines
|
14
|
+
all_lines += lines_of_page
|
15
|
+
end
|
16
|
+
all_lines
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
def get_page_text page_text
|
21
|
+
page_text.text.remove_empty_lines.lines
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
def get_pages reader
|
26
|
+
reader.pages
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
def get_transaction_table_index lines
|
31
|
+
lines_in_file = lines.map{ |line| split_on_2_or_more_spaces(line) }
|
32
|
+
lines_in_file.each_with_index do |line, index|
|
33
|
+
if line[0] == 'TransDate'
|
34
|
+
return index + 1
|
35
|
+
end
|
36
|
+
end
|
37
|
+
return -1 #no transactions found on page
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def split_on_2_or_more_spaces str
|
44
|
+
str.strip.split(/\s\s+/)
|
45
|
+
end
|
46
|
+
|
47
|
+
def remove_empty_lines
|
48
|
+
self.gsub /^$\n/, ''
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require_relative 'gtb-excel-parser/helpers'
|
2
|
+
|
3
|
+
module NgBankParser
|
4
|
+
class GtbExcel
|
5
|
+
extend GtbExcelHelpers
|
6
|
+
|
7
|
+
class << self
|
8
|
+
def parse(path, password = nil)
|
9
|
+
accepted_formats = [".xls",".xlsx"];
|
10
|
+
unless accepted_formats.include? File.extname(path)
|
11
|
+
return error_message("Invalid file format")
|
12
|
+
end
|
13
|
+
|
14
|
+
file = read_file_contents(path)
|
15
|
+
if (file[:type] == "html")
|
16
|
+
html_parse(file[:contents])
|
17
|
+
elsif (file[:type] == "xls")
|
18
|
+
xls_parse(file[:contents])
|
19
|
+
else
|
20
|
+
return error_message("Could not parse this file")
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def html_parse(file)
|
25
|
+
data = {}
|
26
|
+
|
27
|
+
data[:transactions] = get_transactions_from_html(file)
|
28
|
+
data[:account_number] = file.css("#lblAcctNo").text().return_first_number
|
29
|
+
data[:from_date] = file.css("#lblPeriod1").text().convert_string_to_date
|
30
|
+
data[:to_date] = file.css("#lblPeriod2").text().convert_string_to_date
|
31
|
+
data[:account_name] = file.css("#lblAcctName").text()
|
32
|
+
data[:bank_name] = "Guaranty Trust Bank"
|
33
|
+
|
34
|
+
send_response(data)
|
35
|
+
end
|
36
|
+
|
37
|
+
def xls_parse(file)
|
38
|
+
data = {}
|
39
|
+
|
40
|
+
data[:transactions] = get_transactions_from_excel(file)
|
41
|
+
data[:account_number] = file.row(10)[0].return_first_number
|
42
|
+
date_strings = file.row(14)[0].get_date_strings
|
43
|
+
data[:from_date] = date_strings[0].convert_string_to_date
|
44
|
+
data[:to_date] = date_strings[1].convert_string_to_date
|
45
|
+
data[:account_name] = file.row(5)[0]
|
46
|
+
data[:bank_name] = "Guaranty Trust Bank"
|
47
|
+
|
48
|
+
send_response(data)
|
49
|
+
end
|
50
|
+
|
51
|
+
def error_message(text)
|
52
|
+
return {
|
53
|
+
status: 0,
|
54
|
+
message: text
|
55
|
+
}
|
56
|
+
end
|
57
|
+
|
58
|
+
def send_response(data)
|
59
|
+
return {
|
60
|
+
status: 1,
|
61
|
+
data: data
|
62
|
+
}
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'roo'
|
3
|
+
require 'date'
|
4
|
+
|
5
|
+
class Hash
|
6
|
+
def without(*keys)
|
7
|
+
cpy = self.dup
|
8
|
+
keys.each { |key| cpy.delete(key) }
|
9
|
+
cpy
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class String
|
14
|
+
def convert_to_number
|
15
|
+
self.to_s.scan(/\b-?[\d.]+/).join.to_f
|
16
|
+
end
|
17
|
+
def return_first_number
|
18
|
+
self.scan(/\d+/)[0]
|
19
|
+
end
|
20
|
+
def convert_string_to_date
|
21
|
+
date_string = self.scan(/.....\d*..\d{4}/)[0]
|
22
|
+
Date.strptime(date_string,"%d/%b/%Y")
|
23
|
+
end
|
24
|
+
def get_date_strings
|
25
|
+
self.scan(/.....\d*..\d{4}/)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
module GtbExcelHelpers
|
31
|
+
def read_file_contents(path)
|
32
|
+
if (File.extname(path) == '.xls')
|
33
|
+
contents = Nokogiri::HTML(open(path))
|
34
|
+
if has_transactions_table(contents)
|
35
|
+
{type: "html", contents: contents}
|
36
|
+
else
|
37
|
+
{ type: "unknown" }
|
38
|
+
end
|
39
|
+
else
|
40
|
+
contents = Roo::Excelx.new(path)
|
41
|
+
{type: "xls", contents: contents}
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def has_transactions_table(contents)
|
46
|
+
contents.css("#dgtrans")
|
47
|
+
end
|
48
|
+
|
49
|
+
def get_transactions_from_html(file)
|
50
|
+
extract_transaction_rows_from_html(file)
|
51
|
+
convert_html_rows_to_transactions
|
52
|
+
end
|
53
|
+
|
54
|
+
def extract_transaction_rows_from_html(file)
|
55
|
+
@rows = file.xpath('//table[@id="dgtrans"]/tr') # Get transaction table
|
56
|
+
@rows.shift # Remove header row
|
57
|
+
end
|
58
|
+
|
59
|
+
def convert_html_rows_to_transactions
|
60
|
+
transactions = @rows.collect do |row|
|
61
|
+
transaction = {}
|
62
|
+
[
|
63
|
+
[:date, 'td[1]/text()'], #date
|
64
|
+
[:ref, 'td[2]/text()'], #ref
|
65
|
+
[:debit, 'td[4]/text()'], #debit
|
66
|
+
[:credit, 'td[5]/text()'], #credit
|
67
|
+
[:balance, 'td[6]/text()'], #balance
|
68
|
+
[:remarks, 'td[7]/text()'], #remarks
|
69
|
+
].each do |column_name, xpath|
|
70
|
+
integer_columns = [:debit, :credit, :balance]
|
71
|
+
column_value = row.at_xpath(xpath).text()
|
72
|
+
|
73
|
+
# If it's an integer field convert to number
|
74
|
+
if integer_columns.include?(column_name)
|
75
|
+
column_value = column_value.convert_to_number;
|
76
|
+
end
|
77
|
+
|
78
|
+
transaction[column_name] = column_value
|
79
|
+
transaction[:date] = Date.strptime(column_value,"%d-%b-%Y") if column_name == :date
|
80
|
+
end
|
81
|
+
|
82
|
+
filter_debit_or_credit(transaction)
|
83
|
+
end
|
84
|
+
filter_invalid(transactions)
|
85
|
+
end
|
86
|
+
|
87
|
+
def filter_invalid(transactions)
|
88
|
+
transactions.select do |row|
|
89
|
+
is_valid_transaction(row)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def filter_debit_or_credit(transaction)
|
94
|
+
if (transaction[:debit].nil? || transaction[:debit] == 0)
|
95
|
+
transaction[:type] = "credit"
|
96
|
+
transaction[:amount] = transaction[:credit]
|
97
|
+
else
|
98
|
+
transaction[:type] = "debit"
|
99
|
+
transaction[:amount] = transaction[:debit]
|
100
|
+
end
|
101
|
+
|
102
|
+
# Remove credit and debit keys
|
103
|
+
transaction.without(:debit, :credit)
|
104
|
+
end
|
105
|
+
|
106
|
+
def get_transactions_from_excel(file)
|
107
|
+
transactions = []
|
108
|
+
file.each(date: 'Trans Date', ref: 'Reference', debit: 'Debit', credit: 'Credit', balance: 'Balance', remarks: 'Remarks') do |row|
|
109
|
+
if is_valid_transaction(row)
|
110
|
+
transaction = filter_debit_or_credit(row)
|
111
|
+
transaction[:ref] = transaction[:ref].to_s
|
112
|
+
transactions << transaction
|
113
|
+
end
|
114
|
+
end
|
115
|
+
transactions
|
116
|
+
end
|
117
|
+
|
118
|
+
def is_valid_transaction(row)
|
119
|
+
return row[:date].is_a?(Date) && (row[:amount] || row[:credit] || row[:debit]).is_a?(Float) && row[:balance].is_a?(Float) && !row[:remarks].empty?
|
120
|
+
end
|
121
|
+
end
|