ng-bank-parser 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +35 -0
- data/.rspec +2 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +80 -0
- data/LICENSE +340 -0
- data/LICENSE.txt +21 -0
- data/README.md +92 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/lib/ng-bank-parser.rb +11 -0
- data/lib/ng-bank-parser/banks.rb +24 -0
- data/lib/ng-bank-parser/fixtures/firstbank-pdf-invalid.xlsx +0 -0
- data/lib/ng-bank-parser/fixtures/firstbank-pdf-valid.pdf +0 -0
- data/lib/ng-bank-parser/fixtures/gtb-excel-invalid.pdf +0 -0
- data/lib/ng-bank-parser/fixtures/gtb-excel-valid.xls +1332 -0
- data/lib/ng-bank-parser/fixtures/gtb-excel-valid.xlsx +0 -0
- data/lib/ng-bank-parser/fixtures/uba-pdf-invalid.pdf +0 -0
- data/lib/ng-bank-parser/fixtures/uba-pdf-valid.pdf +0 -0
- data/lib/ng-bank-parser/parsers/firstbank-pdf-parser.rb +81 -0
- data/lib/ng-bank-parser/parsers/firstbank-pdf-parser/helpers.rb +172 -0
- data/lib/ng-bank-parser/parsers/firstbank-pdf-parser/statement_utils.rb +51 -0
- data/lib/ng-bank-parser/parsers/gtb-excel-parser.rb +66 -0
- data/lib/ng-bank-parser/parsers/gtb-excel-parser/helpers.rb +121 -0
- data/lib/ng-bank-parser/parsers/uba-pdf-parser.rb +98 -0
- data/lib/ng-bank-parser/parsers/uba-pdf-parser/classes/string.rb +56 -0
- data/lib/ng-bank-parser/parsers/uba-pdf-parser/classes/transaction.rb +73 -0
- data/lib/ng-bank-parser/parsers/uba-pdf-parser/constants.rb +32 -0
- data/lib/ng-bank-parser/parsers/uba-pdf-parser/pdf_checks.rb +58 -0
- data/lib/ng-bank-parser/parsers/uba-pdf-parser/transaction_tools.rb +110 -0
- data/lib/ng-bank-parser/pdf-unlocker.rb +24 -0
- data/lib/ng-bank-parser/router.rb +44 -0
- data/lib/ng-bank-parser/version.rb +3 -0
- data/ng-bank-parser.gemspec +31 -0
- metadata +198 -0
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'pdf-reader'
|
2
|
+
require_relative 'firstbank-pdf-parser/helpers'
|
3
|
+
|
4
|
+
|
5
|
+
module NgBankParser
|
6
|
+
class FirstbankPdf
|
7
|
+
extend FirstbankPdfHelpers
|
8
|
+
|
9
|
+
@@transactions = []
|
10
|
+
|
11
|
+
def self.parse(path, password = nil)
|
12
|
+
accepted_formats = [".pdf"];
|
13
|
+
unless accepted_formats.include? File.extname(path)
|
14
|
+
return error_message 'Invalid file format'
|
15
|
+
end
|
16
|
+
|
17
|
+
if has_encryption? path
|
18
|
+
if password
|
19
|
+
unless get_unlocked_pdf? path, password
|
20
|
+
return error_message 'Password supplied for decryption is invalid.'
|
21
|
+
end
|
22
|
+
else
|
23
|
+
return error_message 'PDF File is encrypted and no password was supplied'
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
unless contains_account_data?
|
28
|
+
return error_message 'Unable to read account details'
|
29
|
+
end
|
30
|
+
|
31
|
+
if contains_transactions_table?
|
32
|
+
extract_transactions(clean(get_raw_transactions))
|
33
|
+
data = {}
|
34
|
+
data[:bank_name] = 'First Bank'
|
35
|
+
data[:account_number] = get_account_number
|
36
|
+
data[:account_name] = get_account_name
|
37
|
+
data[:from_date] = get_from_date
|
38
|
+
data[:to_date] = get_to_date
|
39
|
+
data[:transactions] = @@transactions
|
40
|
+
send_response data
|
41
|
+
else
|
42
|
+
return error_message 'Could not find any transactions'
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def self.extract_transactions(jagged_array = [[]])
|
51
|
+
jagged_array.each do |array|
|
52
|
+
if is_transaction_row? array
|
53
|
+
transaction = {}
|
54
|
+
transaction[:ref] = ''
|
55
|
+
transaction[:date] = Date.strptime(array[0], '%d-%b-%y')
|
56
|
+
transaction[:remarks] = array[1]
|
57
|
+
transaction[:amount] = array[3].delete(',').to_f
|
58
|
+
transaction[:balance] = array[4].delete(',').to_f
|
59
|
+
if transaction[:balance].to_i > get_last_balance
|
60
|
+
transaction[:type] = 'credit'
|
61
|
+
update_last_balance transaction[:balance]
|
62
|
+
else
|
63
|
+
transaction[:type] = 'debit'
|
64
|
+
update_last_balance transaction[:balance]
|
65
|
+
end
|
66
|
+
@@transactions << transaction
|
67
|
+
else
|
68
|
+
@@transactions.last[:remarks] += array[0] if @@transactions
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
def self.clean(jagged_array = [[]])
|
75
|
+
jagged_array.reject! do |array|
|
76
|
+
is_row_invalid? array
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,172 @@
|
|
1
|
+
require 'pdf-reader'
|
2
|
+
require 'date'
|
3
|
+
require 'open-uri'
|
4
|
+
require_relative 'statement_utils'
|
5
|
+
require_relative '../../pdf-unlocker.rb'
|
6
|
+
|
7
|
+
module NgBankParser
|
8
|
+
module FirstbankPdfHelpers
|
9
|
+
include StatementUtils
|
10
|
+
|
11
|
+
@@pdf_reader = nil
|
12
|
+
@@raw_transactions = [[]]
|
13
|
+
|
14
|
+
def has_encryption? path
|
15
|
+
begin
|
16
|
+
@@pdf_reader = PDF::Reader.new(path)
|
17
|
+
false
|
18
|
+
rescue PDF::Reader::EncryptedPDFError
|
19
|
+
true
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
def get_unlocked_pdf? path, password
|
25
|
+
response = PDFUnlocker.new(File.new(path), password).unlocked_pdf
|
26
|
+
return false unless response
|
27
|
+
if response.include? 'Unlock Failed'
|
28
|
+
return false
|
29
|
+
else
|
30
|
+
pseudo_file = StringIO.new
|
31
|
+
pseudo_file.write(response)
|
32
|
+
@@pdf_reader = PDF::Reader.new(pseudo_file)
|
33
|
+
return true
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
|
38
|
+
def get_raw_transactions
|
39
|
+
@@raw_transactions
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
def get_transaction_data
|
44
|
+
pages = get_pages @@pdf_reader
|
45
|
+
pages.each do |page|
|
46
|
+
page_text = get_page_text page
|
47
|
+
index = get_transaction_table_index page_text
|
48
|
+
unless index == -1
|
49
|
+
add_to_transactions page_text[index..-1]
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
|
55
|
+
def get_account_data
|
56
|
+
lines = get_first_page_text @@pdf_reader
|
57
|
+
lines.each do |line|
|
58
|
+
if line[0].start_with? 'Account No:'
|
59
|
+
set_account_number line
|
60
|
+
set_last_balance line
|
61
|
+
elsif line[0].start_with? 'Account Name:'
|
62
|
+
set_account_name line
|
63
|
+
elsif line[0].start_with? 'For the Period of:'
|
64
|
+
set_statement_period line
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
def get_account_number
|
71
|
+
@@account_number
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
def get_account_name
|
76
|
+
@@account_name
|
77
|
+
end
|
78
|
+
|
79
|
+
|
80
|
+
def get_last_balance
|
81
|
+
@@last_balance.to_i
|
82
|
+
end
|
83
|
+
|
84
|
+
def get_from_date
|
85
|
+
Date.strptime(@@from_date.strip,"%d-%b-%Y")
|
86
|
+
end
|
87
|
+
|
88
|
+
|
89
|
+
def get_to_date
|
90
|
+
Date.strptime(@@to_date.strip,"%d-%b-%Y")
|
91
|
+
end
|
92
|
+
|
93
|
+
|
94
|
+
def contains_transactions_table?
|
95
|
+
get_transaction_data
|
96
|
+
@@raw_transactions
|
97
|
+
end
|
98
|
+
|
99
|
+
|
100
|
+
def contains_account_data?
|
101
|
+
get_account_data
|
102
|
+
@@account_name && @@account_number && @@last_balance && @@statement_period
|
103
|
+
end
|
104
|
+
|
105
|
+
|
106
|
+
def set_account_number line
|
107
|
+
@@account_number = line[1] unless line[1].blank?
|
108
|
+
end
|
109
|
+
|
110
|
+
|
111
|
+
def set_account_name line
|
112
|
+
@@account_name = line[1] unless line[1].blank?
|
113
|
+
end
|
114
|
+
|
115
|
+
|
116
|
+
def set_last_balance line
|
117
|
+
@@last_balance = line[2] unless line[1].blank?
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
def update_last_balance balance
|
122
|
+
@@last_balance = balance
|
123
|
+
end
|
124
|
+
|
125
|
+
|
126
|
+
def set_statement_period line
|
127
|
+
unless line[1].blank?
|
128
|
+
@@statement_period = line[1].split('to')
|
129
|
+
@@from_date, @@to_date = @@statement_period
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
|
134
|
+
def is_transaction_row? row
|
135
|
+
row[0] =~ /(\d\d-[a-zA-Z]{3}-\d\d)/
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
def is_row_invalid? row
|
140
|
+
row.length == 0 ||
|
141
|
+
row[0].start_with?('END OF STATEMENT') ||
|
142
|
+
row[0] == ('Balance B/F') ||
|
143
|
+
row[0].start_with?('Page')
|
144
|
+
end
|
145
|
+
|
146
|
+
|
147
|
+
def error_message msg
|
148
|
+
return {
|
149
|
+
status: 0,
|
150
|
+
message: msg
|
151
|
+
}
|
152
|
+
end
|
153
|
+
|
154
|
+
|
155
|
+
def send_response data
|
156
|
+
return {
|
157
|
+
status: 1,
|
158
|
+
data: data
|
159
|
+
}
|
160
|
+
end
|
161
|
+
|
162
|
+
|
163
|
+
private
|
164
|
+
|
165
|
+
def add_to_transactions lines
|
166
|
+
lines.each do |line|
|
167
|
+
@@raw_transactions << line.strip.split(/\s\s+/)
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
end
|
172
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module NgBankParser
|
2
|
+
module StatementUtils
|
3
|
+
|
4
|
+
def get_first_page_text reader
|
5
|
+
lines = reader.pages.first.text.remove_empty_lines.lines #lines without the spaces
|
6
|
+
lines.map{ |line| split_on_2_or_more_spaces(line) }
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
def get_all_text reader
|
11
|
+
all_lines = []
|
12
|
+
reader.pages.each do |page|
|
13
|
+
lines_of_page = page.text.remove_empty_lines.lines
|
14
|
+
all_lines += lines_of_page
|
15
|
+
end
|
16
|
+
all_lines
|
17
|
+
end
|
18
|
+
|
19
|
+
|
20
|
+
def get_page_text page_text
|
21
|
+
page_text.text.remove_empty_lines.lines
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
def get_pages reader
|
26
|
+
reader.pages
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
def get_transaction_table_index lines
|
31
|
+
lines_in_file = lines.map{ |line| split_on_2_or_more_spaces(line) }
|
32
|
+
lines_in_file.each_with_index do |line, index|
|
33
|
+
if line[0] == 'TransDate'
|
34
|
+
return index + 1
|
35
|
+
end
|
36
|
+
end
|
37
|
+
return -1 #no transactions found on page
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def split_on_2_or_more_spaces str
|
44
|
+
str.strip.split(/\s\s+/)
|
45
|
+
end
|
46
|
+
|
47
|
+
def remove_empty_lines
|
48
|
+
self.gsub /^$\n/, ''
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
require_relative 'gtb-excel-parser/helpers'
|
2
|
+
|
3
|
+
module NgBankParser
|
4
|
+
class GtbExcel
|
5
|
+
extend GtbExcelHelpers
|
6
|
+
|
7
|
+
class << self
|
8
|
+
def parse(path, password = nil)
|
9
|
+
accepted_formats = [".xls",".xlsx"];
|
10
|
+
unless accepted_formats.include? File.extname(path)
|
11
|
+
return error_message("Invalid file format")
|
12
|
+
end
|
13
|
+
|
14
|
+
file = read_file_contents(path)
|
15
|
+
if (file[:type] == "html")
|
16
|
+
html_parse(file[:contents])
|
17
|
+
elsif (file[:type] == "xls")
|
18
|
+
xls_parse(file[:contents])
|
19
|
+
else
|
20
|
+
return error_message("Could not parse this file")
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def html_parse(file)
|
25
|
+
data = {}
|
26
|
+
|
27
|
+
data[:transactions] = get_transactions_from_html(file)
|
28
|
+
data[:account_number] = file.css("#lblAcctNo").text().return_first_number
|
29
|
+
data[:from_date] = file.css("#lblPeriod1").text().convert_string_to_date
|
30
|
+
data[:to_date] = file.css("#lblPeriod2").text().convert_string_to_date
|
31
|
+
data[:account_name] = file.css("#lblAcctName").text()
|
32
|
+
data[:bank_name] = "Guaranty Trust Bank"
|
33
|
+
|
34
|
+
send_response(data)
|
35
|
+
end
|
36
|
+
|
37
|
+
def xls_parse(file)
|
38
|
+
data = {}
|
39
|
+
|
40
|
+
data[:transactions] = get_transactions_from_excel(file)
|
41
|
+
data[:account_number] = file.row(10)[0].return_first_number
|
42
|
+
date_strings = file.row(14)[0].get_date_strings
|
43
|
+
data[:from_date] = date_strings[0].convert_string_to_date
|
44
|
+
data[:to_date] = date_strings[1].convert_string_to_date
|
45
|
+
data[:account_name] = file.row(5)[0]
|
46
|
+
data[:bank_name] = "Guaranty Trust Bank"
|
47
|
+
|
48
|
+
send_response(data)
|
49
|
+
end
|
50
|
+
|
51
|
+
def error_message(text)
|
52
|
+
return {
|
53
|
+
status: 0,
|
54
|
+
message: text
|
55
|
+
}
|
56
|
+
end
|
57
|
+
|
58
|
+
def send_response(data)
|
59
|
+
return {
|
60
|
+
status: 1,
|
61
|
+
data: data
|
62
|
+
}
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'roo'
|
3
|
+
require 'date'
|
4
|
+
|
5
|
+
class Hash
|
6
|
+
def without(*keys)
|
7
|
+
cpy = self.dup
|
8
|
+
keys.each { |key| cpy.delete(key) }
|
9
|
+
cpy
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class String
|
14
|
+
def convert_to_number
|
15
|
+
self.to_s.scan(/\b-?[\d.]+/).join.to_f
|
16
|
+
end
|
17
|
+
def return_first_number
|
18
|
+
self.scan(/\d+/)[0]
|
19
|
+
end
|
20
|
+
def convert_string_to_date
|
21
|
+
date_string = self.scan(/.....\d*..\d{4}/)[0]
|
22
|
+
Date.strptime(date_string,"%d/%b/%Y")
|
23
|
+
end
|
24
|
+
def get_date_strings
|
25
|
+
self.scan(/.....\d*..\d{4}/)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
module GtbExcelHelpers
|
31
|
+
def read_file_contents(path)
|
32
|
+
if (File.extname(path) == '.xls')
|
33
|
+
contents = Nokogiri::HTML(open(path))
|
34
|
+
if has_transactions_table(contents)
|
35
|
+
{type: "html", contents: contents}
|
36
|
+
else
|
37
|
+
{ type: "unknown" }
|
38
|
+
end
|
39
|
+
else
|
40
|
+
contents = Roo::Excelx.new(path)
|
41
|
+
{type: "xls", contents: contents}
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def has_transactions_table(contents)
|
46
|
+
contents.css("#dgtrans")
|
47
|
+
end
|
48
|
+
|
49
|
+
def get_transactions_from_html(file)
|
50
|
+
extract_transaction_rows_from_html(file)
|
51
|
+
convert_html_rows_to_transactions
|
52
|
+
end
|
53
|
+
|
54
|
+
def extract_transaction_rows_from_html(file)
|
55
|
+
@rows = file.xpath('//table[@id="dgtrans"]/tr') # Get transaction table
|
56
|
+
@rows.shift # Remove header row
|
57
|
+
end
|
58
|
+
|
59
|
+
def convert_html_rows_to_transactions
|
60
|
+
transactions = @rows.collect do |row|
|
61
|
+
transaction = {}
|
62
|
+
[
|
63
|
+
[:date, 'td[1]/text()'], #date
|
64
|
+
[:ref, 'td[2]/text()'], #ref
|
65
|
+
[:debit, 'td[4]/text()'], #debit
|
66
|
+
[:credit, 'td[5]/text()'], #credit
|
67
|
+
[:balance, 'td[6]/text()'], #balance
|
68
|
+
[:remarks, 'td[7]/text()'], #remarks
|
69
|
+
].each do |column_name, xpath|
|
70
|
+
integer_columns = [:debit, :credit, :balance]
|
71
|
+
column_value = row.at_xpath(xpath).text()
|
72
|
+
|
73
|
+
# If it's an integer field convert to number
|
74
|
+
if integer_columns.include?(column_name)
|
75
|
+
column_value = column_value.convert_to_number;
|
76
|
+
end
|
77
|
+
|
78
|
+
transaction[column_name] = column_value
|
79
|
+
transaction[:date] = Date.strptime(column_value,"%d-%b-%Y") if column_name == :date
|
80
|
+
end
|
81
|
+
|
82
|
+
filter_debit_or_credit(transaction)
|
83
|
+
end
|
84
|
+
filter_invalid(transactions)
|
85
|
+
end
|
86
|
+
|
87
|
+
def filter_invalid(transactions)
|
88
|
+
transactions.select do |row|
|
89
|
+
is_valid_transaction(row)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def filter_debit_or_credit(transaction)
|
94
|
+
if (transaction[:debit].nil? || transaction[:debit] == 0)
|
95
|
+
transaction[:type] = "credit"
|
96
|
+
transaction[:amount] = transaction[:credit]
|
97
|
+
else
|
98
|
+
transaction[:type] = "debit"
|
99
|
+
transaction[:amount] = transaction[:debit]
|
100
|
+
end
|
101
|
+
|
102
|
+
# Remove credit and debit keys
|
103
|
+
transaction.without(:debit, :credit)
|
104
|
+
end
|
105
|
+
|
106
|
+
def get_transactions_from_excel(file)
|
107
|
+
transactions = []
|
108
|
+
file.each(date: 'Trans Date', ref: 'Reference', debit: 'Debit', credit: 'Credit', balance: 'Balance', remarks: 'Remarks') do |row|
|
109
|
+
if is_valid_transaction(row)
|
110
|
+
transaction = filter_debit_or_credit(row)
|
111
|
+
transaction[:ref] = transaction[:ref].to_s
|
112
|
+
transactions << transaction
|
113
|
+
end
|
114
|
+
end
|
115
|
+
transactions
|
116
|
+
end
|
117
|
+
|
118
|
+
def is_valid_transaction(row)
|
119
|
+
return row[:date].is_a?(Date) && (row[:amount] || row[:credit] || row[:debit]).is_a?(Float) && row[:balance].is_a?(Float) && !row[:remarks].empty?
|
120
|
+
end
|
121
|
+
end
|