ng-bank-parser 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +35 -0
- data/.rspec +2 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +80 -0
- data/LICENSE +340 -0
- data/LICENSE.txt +21 -0
- data/README.md +92 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/lib/ng-bank-parser.rb +11 -0
- data/lib/ng-bank-parser/banks.rb +24 -0
- data/lib/ng-bank-parser/fixtures/firstbank-pdf-invalid.xlsx +0 -0
- data/lib/ng-bank-parser/fixtures/firstbank-pdf-valid.pdf +0 -0
- data/lib/ng-bank-parser/fixtures/gtb-excel-invalid.pdf +0 -0
- data/lib/ng-bank-parser/fixtures/gtb-excel-valid.xls +1332 -0
- data/lib/ng-bank-parser/fixtures/gtb-excel-valid.xlsx +0 -0
- data/lib/ng-bank-parser/fixtures/uba-pdf-invalid.pdf +0 -0
- data/lib/ng-bank-parser/fixtures/uba-pdf-valid.pdf +0 -0
- data/lib/ng-bank-parser/parsers/firstbank-pdf-parser.rb +81 -0
- data/lib/ng-bank-parser/parsers/firstbank-pdf-parser/helpers.rb +172 -0
- data/lib/ng-bank-parser/parsers/firstbank-pdf-parser/statement_utils.rb +51 -0
- data/lib/ng-bank-parser/parsers/gtb-excel-parser.rb +66 -0
- data/lib/ng-bank-parser/parsers/gtb-excel-parser/helpers.rb +121 -0
- data/lib/ng-bank-parser/parsers/uba-pdf-parser.rb +98 -0
- data/lib/ng-bank-parser/parsers/uba-pdf-parser/classes/string.rb +56 -0
- data/lib/ng-bank-parser/parsers/uba-pdf-parser/classes/transaction.rb +73 -0
- data/lib/ng-bank-parser/parsers/uba-pdf-parser/constants.rb +32 -0
- data/lib/ng-bank-parser/parsers/uba-pdf-parser/pdf_checks.rb +58 -0
- data/lib/ng-bank-parser/parsers/uba-pdf-parser/transaction_tools.rb +110 -0
- data/lib/ng-bank-parser/pdf-unlocker.rb +24 -0
- data/lib/ng-bank-parser/router.rb +44 -0
- data/lib/ng-bank-parser/version.rb +3 -0
- data/ng-bank-parser.gemspec +31 -0
- metadata +198 -0
@@ -0,0 +1,98 @@
|
|
1
|
+
require 'pdf-reader'
|
2
|
+
require 'open-uri'
|
3
|
+
require_relative 'uba-pdf-parser/transaction_tools'
|
4
|
+
|
5
|
+
|
6
|
+
module NgBankParser
|
7
|
+
class UbaPdf
|
8
|
+
extend TransactionTools
|
9
|
+
|
10
|
+
class << self
|
11
|
+
def parse(url, password = nil)
|
12
|
+
unless ACCEPTED_FORMATS.include? File.extname(url)
|
13
|
+
return invalid_file
|
14
|
+
end
|
15
|
+
|
16
|
+
file = open(url)
|
17
|
+
@reader = PDF::Reader.new(file)
|
18
|
+
set_up_first_page
|
19
|
+
if is_valid_pdf?
|
20
|
+
set_account_details
|
21
|
+
set_transactions
|
22
|
+
return return_payload
|
23
|
+
else
|
24
|
+
return invalid_file
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def set_up_first_page
|
31
|
+
@first_page_text = @reader.pages.first.text.remove_empty_lines
|
32
|
+
set_column_positions(@first_page_text.lines[TABLE_HEADER_LINE_INDEX])
|
33
|
+
end
|
34
|
+
|
35
|
+
def set_account_details
|
36
|
+
set_bank_name
|
37
|
+
set_account_name
|
38
|
+
set_account_number
|
39
|
+
set_dates
|
40
|
+
# print_account_details
|
41
|
+
end
|
42
|
+
|
43
|
+
def print_account_details
|
44
|
+
puts @bank_name
|
45
|
+
puts @account_name
|
46
|
+
puts @account_number
|
47
|
+
puts @from_date
|
48
|
+
puts @to_date
|
49
|
+
end
|
50
|
+
|
51
|
+
def set_bank_name
|
52
|
+
@bank_name = "United Bank for Africa"
|
53
|
+
end
|
54
|
+
|
55
|
+
def set_account_name
|
56
|
+
account_name_line = @first_page_text.lines[ACCOUNT_NAME_LINE_INDEX]
|
57
|
+
@account_name = account_name_line.get_text_between_markers(ACCOUNT_NAME_START_MARKER, ACCOUNT_NAME_END_MARKER)
|
58
|
+
end
|
59
|
+
|
60
|
+
def set_account_number
|
61
|
+
account_number_line = @first_page_text.lines[ACCOUNT_NUMBER_LINE_INDEX]
|
62
|
+
account_number_line_string = account_number_line.remove_white_spaces.downcase!
|
63
|
+
@account_number = account_number_line_string.get_text_between_markers(ACCOUNT_NUMBER_STRING, CURRENCY_STRING).upcase!
|
64
|
+
end
|
65
|
+
|
66
|
+
def set_dates
|
67
|
+
date_line = @first_page_text.lines[1]
|
68
|
+
date_line_string = date_line.remove_white_spaces.downcase!
|
69
|
+
@from_date = date_line_string.get_text_between_markers(START_DATE_STRING, END_DATE_STRING).upcase!.convert_to_date
|
70
|
+
@to_date = date_line_string.get_text_after_marker(END_DATE_STRING).upcase!.convert_to_date
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
def return_payload
|
75
|
+
return {
|
76
|
+
status: 1,
|
77
|
+
data: {
|
78
|
+
bank_name: @bank_name,
|
79
|
+
account_number: @account_number,
|
80
|
+
account_name: @account_name,
|
81
|
+
from_date: @from_date,
|
82
|
+
to_date: @to_date,
|
83
|
+
transactions: @transactions_hashes_array
|
84
|
+
}
|
85
|
+
}
|
86
|
+
end
|
87
|
+
|
88
|
+
def invalid_file
|
89
|
+
return {
|
90
|
+
status: 0, message: INVALID_FILE_STRING
|
91
|
+
}
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
|
@@ -0,0 +1,56 @@
|
|
1
|
+
class String
|
2
|
+
def extract_column(column)
|
3
|
+
self[column[:start]..column[:end]]
|
4
|
+
end
|
5
|
+
|
6
|
+
def remove_empty_lines
|
7
|
+
self.gsub /^$\n/, ''
|
8
|
+
end
|
9
|
+
|
10
|
+
def remove_multiple_lines
|
11
|
+
self.gsub('\n', '')
|
12
|
+
end
|
13
|
+
|
14
|
+
def remove_white_spaces
|
15
|
+
self.gsub(/\s+/, "")
|
16
|
+
end
|
17
|
+
|
18
|
+
def reduce_to_singular_white_space
|
19
|
+
self.gsub(/\s+/, " ")
|
20
|
+
end
|
21
|
+
|
22
|
+
def remove_commas
|
23
|
+
return self.gsub(/,/, '')
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
def get_text_between_markers(marker1, marker2)
|
28
|
+
self[/#{Regexp.escape(marker1)}(.*?)#{Regexp.escape(marker2)}/m, 1]
|
29
|
+
end
|
30
|
+
|
31
|
+
def get_text_after_marker(marker)
|
32
|
+
self.partition(marker).last
|
33
|
+
end
|
34
|
+
|
35
|
+
def get_date_string
|
36
|
+
self[0...10]
|
37
|
+
end
|
38
|
+
|
39
|
+
def get_first_line
|
40
|
+
self.lines[0]
|
41
|
+
end
|
42
|
+
|
43
|
+
def is_date?
|
44
|
+
begin
|
45
|
+
Date.parse(self)
|
46
|
+
rescue ArgumentError
|
47
|
+
return false
|
48
|
+
end
|
49
|
+
return true
|
50
|
+
end
|
51
|
+
|
52
|
+
def convert_to_date
|
53
|
+
Date.strptime(self, '%d-%b-%Y')
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
require 'active_support/json/decoding.rb'
|
2
|
+
require 'active_support/json/encoding.rb'
|
3
|
+
|
4
|
+
|
5
|
+
class Transaction
|
6
|
+
|
7
|
+
TYPE = {
|
8
|
+
debit: 'debit',
|
9
|
+
credit: 'credit'
|
10
|
+
}
|
11
|
+
|
12
|
+
|
13
|
+
def initialize(string, columns)
|
14
|
+
set_date_from_transaction_string(string)
|
15
|
+
set_amount_and_type_from_transaction_string(string, columns[:debit_column], columns[:credit_column])
|
16
|
+
set_balance_from_transaction_string(string, columns[:balance_column])
|
17
|
+
set_remarks_from_transaction_string(string, columns[:remarks_column])
|
18
|
+
set_ref
|
19
|
+
end
|
20
|
+
|
21
|
+
def to_hash
|
22
|
+
{
|
23
|
+
date: @date,
|
24
|
+
amount: @amount,
|
25
|
+
type: @type,
|
26
|
+
balance: @balance,
|
27
|
+
remarks: @remarks,
|
28
|
+
ref: @ref
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def set_date_from_transaction_string(string)
|
35
|
+
@date = Date.strptime(string.get_first_line.get_date_string, '%d-%m-%Y')
|
36
|
+
end
|
37
|
+
|
38
|
+
def set_amount_and_type_from_transaction_string(string, debit_column, credit_column)
|
39
|
+
debit = string.get_first_line.extract_column(debit_column).remove_white_spaces
|
40
|
+
credit = string.get_first_line.extract_column(credit_column).remove_white_spaces
|
41
|
+
|
42
|
+
if (debit.blank? && !credit.blank?)
|
43
|
+
@type = TYPE[:credit]
|
44
|
+
@amount = credit.remove_commas.to_f
|
45
|
+
elsif (credit.blank? && !debit.blank?)
|
46
|
+
@type = TYPE[:debit]
|
47
|
+
@amount = debit.remove_commas.to_f
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def set_balance_from_transaction_string(string, balance_column)
|
52
|
+
@balance = string.get_first_line.extract_column(balance_column).remove_white_spaces.remove_commas.to_f
|
53
|
+
end
|
54
|
+
|
55
|
+
def set_remarks_from_transaction_string(string, remarks_column)
|
56
|
+
@remarks ||= ''
|
57
|
+
if string.lines.count == 1
|
58
|
+
@remarks = string.extract_column(remarks_column)
|
59
|
+
elsif string.lines.count > 1
|
60
|
+
string.lines.each do |line|
|
61
|
+
@remarks = @remarks.+ line.extract_column(remarks_column)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
@remarks = @remarks.reduce_to_singular_white_space
|
65
|
+
end
|
66
|
+
|
67
|
+
def set_ref
|
68
|
+
@ref = '0'
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
end
|
73
|
+
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module NgBankParser
|
2
|
+
module Constants
|
3
|
+
ACCEPTED_FORMATS = ['.pdf']
|
4
|
+
|
5
|
+
TABLE_HEADER_LINE_INDEX = 6
|
6
|
+
LENGTH_OF_TRANSACTION_DATE_STRING = 10
|
7
|
+
ACCOUNT_NAME_LINE_INDEX = 2
|
8
|
+
ACCOUNT_NUMBER_LINE_INDEX = 5
|
9
|
+
RANGE_OF_LINES_WITH_TRANSACTIONS = 7..-3
|
10
|
+
|
11
|
+
|
12
|
+
FIRST_LINE_CHECKER_STRING = 'statementoftransactions'
|
13
|
+
START_DATE_STRING = 'startdate:'
|
14
|
+
END_DATE_STRING = 'enddate:'
|
15
|
+
ACCOUNT_NUMBER_STRING = 'accountnumber:'
|
16
|
+
CURRENCY_STRING = 'currency'
|
17
|
+
ACCOUNT_NAME_START_MARKER = '( '
|
18
|
+
ACCOUNT_NAME_END_MARKER = ' )'
|
19
|
+
|
20
|
+
|
21
|
+
DATE_COLUMN_HEADER_STRING = 'date'
|
22
|
+
REMARKS_COLUMN_HEADER_STRING = 'description'
|
23
|
+
CHEQUE_NUMBER_COLUMN_HEADER_STRING = 'chq. no'
|
24
|
+
VAL_DATE_COLUMN_HEADER_STRING = 'val date'
|
25
|
+
DEBIT_COLUMN_HEADER_STRING = 'debit'
|
26
|
+
CREDIT_COLUMN_HEADER_STRING = 'credit'
|
27
|
+
BALANCE_COLUMN_HEADER_STRING = 'balance'
|
28
|
+
|
29
|
+
COLUMN_OFFSET_RANGE = 0..-2
|
30
|
+
INVALID_FILE_STRING = 'Invalid File'
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require_relative 'constants'
|
2
|
+
|
3
|
+
module NgBankParser
|
4
|
+
module PdfChecks
|
5
|
+
include Constants
|
6
|
+
|
7
|
+
private
|
8
|
+
|
9
|
+
def is_valid_pdf?
|
10
|
+
is_first_line_of_first_page_correct? && is_date_range_line_correct? && is_account_number_line_correct? && are_columns_correct?
|
11
|
+
end
|
12
|
+
|
13
|
+
|
14
|
+
def is_first_line_of_first_page_correct?
|
15
|
+
line_string = @first_page_text.lines[0].remove_white_spaces.downcase!
|
16
|
+
return line_string.end_with?(FIRST_LINE_CHECKER_STRING)
|
17
|
+
end
|
18
|
+
|
19
|
+
def is_date_range_line_correct?
|
20
|
+
date_line = @first_page_text.lines[1]
|
21
|
+
date_line_string = date_line.remove_white_spaces.downcase!
|
22
|
+
return date_line_string.get_text_between_markers(START_DATE_STRING, END_DATE_STRING).upcase!.is_date? && date_line_string.get_text_after_marker(START_DATE_STRING).upcase!.is_date?
|
23
|
+
end
|
24
|
+
|
25
|
+
def is_account_number_line_correct?
|
26
|
+
account_number_line = @first_page_text.lines[ACCOUNT_NUMBER_LINE_INDEX]
|
27
|
+
return account_number_line.remove_white_spaces.downcase!.start_with?(ACCOUNT_NUMBER_STRING)
|
28
|
+
end
|
29
|
+
|
30
|
+
def are_columns_correct?
|
31
|
+
is_date_column_correct? && is_remarks_column_correct? && is_debit_column_correct? && is_credit_column_correct? && is_balance_column_correct?
|
32
|
+
end
|
33
|
+
|
34
|
+
def is_date_column_correct?
|
35
|
+
@first_page_text.lines[TABLE_HEADER_LINE_INDEX]
|
36
|
+
.extract_column(@date_column)
|
37
|
+
.remove_white_spaces.downcase![COLUMN_OFFSET_RANGE] == DATE_COLUMN_HEADER_STRING
|
38
|
+
end
|
39
|
+
|
40
|
+
def is_remarks_column_correct?
|
41
|
+
@first_page_text.lines[TABLE_HEADER_LINE_INDEX].extract_column(@remarks_column).remove_white_spaces.downcase![COLUMN_OFFSET_RANGE] == REMARKS_COLUMN_HEADER_STRING
|
42
|
+
end
|
43
|
+
|
44
|
+
def is_debit_column_correct?
|
45
|
+
@first_page_text.lines[TABLE_HEADER_LINE_INDEX].extract_column(@debit_column).remove_white_spaces.downcase! == DEBIT_COLUMN_HEADER_STRING
|
46
|
+
end
|
47
|
+
|
48
|
+
def is_credit_column_correct?
|
49
|
+
@first_page_text.lines[TABLE_HEADER_LINE_INDEX].extract_column(@credit_column).remove_white_spaces.downcase! == CREDIT_COLUMN_HEADER_STRING
|
50
|
+
end
|
51
|
+
|
52
|
+
def is_balance_column_correct?
|
53
|
+
@first_page_text.lines[TABLE_HEADER_LINE_INDEX].extract_column(@balance_column).remove_white_spaces.downcase! == BALANCE_COLUMN_HEADER_STRING
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
require 'date'
|
2
|
+
require 'active_support/core_ext/object/blank'
|
3
|
+
require_relative 'classes/string'
|
4
|
+
require_relative 'classes/transaction'
|
5
|
+
require_relative 'pdf_checks'
|
6
|
+
|
7
|
+
|
8
|
+
module NgBankParser
|
9
|
+
module TransactionTools
|
10
|
+
include PdfChecks
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def set_transactions
|
15
|
+
@transactions ||= Array.new
|
16
|
+
@transaction_strings ||= Array.new
|
17
|
+
@reader.pages[0..-2].each do |page|
|
18
|
+
seperate_lines_into_transaction_strings(page.text.remove_empty_lines.lines)
|
19
|
+
end
|
20
|
+
@transaction_strings.compact!
|
21
|
+
@transaction_strings.each do |transaction_string|
|
22
|
+
transaction = Transaction.new(transaction_string, @columns)
|
23
|
+
@transactions << transaction
|
24
|
+
end
|
25
|
+
@transactions.shift
|
26
|
+
set_transactions_hashes_array
|
27
|
+
end
|
28
|
+
|
29
|
+
def set_transactions_hashes_array
|
30
|
+
@transactions_hashes_array = []
|
31
|
+
@transactions.each do |transaction|
|
32
|
+
@transactions_hashes_array << transaction.to_hash
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def seperate_lines_into_transaction_strings(lines)
|
37
|
+
@lines = lines[RANGE_OF_LINES_WITH_TRANSACTIONS]
|
38
|
+
@lines.each_with_index do |line, index|
|
39
|
+
if line_has_date?(line)
|
40
|
+
transaction_string = line
|
41
|
+
next_lines = get_next_lines(index)
|
42
|
+
next_lines.each do |next_line|
|
43
|
+
if !line_has_date?(next_line)
|
44
|
+
transaction_string = transaction_string + next_line
|
45
|
+
else
|
46
|
+
break
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
@transaction_strings << transaction_string
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def line_has_date?(line)
|
55
|
+
line.get_date_string.is_date?
|
56
|
+
end
|
57
|
+
|
58
|
+
def get_next_lines(index)
|
59
|
+
@lines[index+1..-1]
|
60
|
+
end
|
61
|
+
|
62
|
+
def set_column_positions(column_header_line)
|
63
|
+
begin
|
64
|
+
column_header_line.downcase!
|
65
|
+
@date_column = {
|
66
|
+
start: column_header_line.index(DATE_COLUMN_HEADER_STRING),
|
67
|
+
end: column_header_line.index(REMARKS_COLUMN_HEADER_STRING)
|
68
|
+
}
|
69
|
+
@remarks_column = {
|
70
|
+
start: column_header_line.index(REMARKS_COLUMN_HEADER_STRING),
|
71
|
+
end: column_header_line.index(CHEQUE_NUMBER_COLUMN_HEADER_STRING)
|
72
|
+
}
|
73
|
+
@cheque_column = {
|
74
|
+
start: column_header_line.index(CHEQUE_NUMBER_COLUMN_HEADER_STRING),
|
75
|
+
end: column_header_line.index(VAL_DATE_COLUMN_HEADER_STRING)
|
76
|
+
}
|
77
|
+
@val_date_column = {
|
78
|
+
start: column_header_line.index(VAL_DATE_COLUMN_HEADER_STRING),
|
79
|
+
end: column_header_line.index(VAL_DATE_COLUMN_HEADER_STRING) + LENGTH_OF_TRANSACTION_DATE_STRING
|
80
|
+
}
|
81
|
+
@debit_column = {
|
82
|
+
start: @val_date_column[:start] + LENGTH_OF_TRANSACTION_DATE_STRING,
|
83
|
+
end: column_header_line.index(DEBIT_COLUMN_HEADER_STRING) + 5
|
84
|
+
}
|
85
|
+
@credit_column = {
|
86
|
+
start: @debit_column[:end] + 1,
|
87
|
+
end: column_header_line.index(CREDIT_COLUMN_HEADER_STRING) + 7
|
88
|
+
}
|
89
|
+
@balance_column = {
|
90
|
+
start: column_header_line.index(BALANCE_COLUMN_HEADER_STRING),
|
91
|
+
end: -1
|
92
|
+
}
|
93
|
+
@columns = {
|
94
|
+
date_column: @date_column,
|
95
|
+
remarks_column: @remarks_column,
|
96
|
+
cheque_column: @cheque_column,
|
97
|
+
val_date_column: @val_date_column,
|
98
|
+
debit_column: @debit_column,
|
99
|
+
credit_column: @credit_column,
|
100
|
+
balance_column: @balance_column
|
101
|
+
}
|
102
|
+
|
103
|
+
rescue NoMethodError
|
104
|
+
return invalid_file
|
105
|
+
end
|
106
|
+
|
107
|
+
end
|
108
|
+
|
109
|
+
end
|
110
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'httmultiparty'
|
2
|
+
|
3
|
+
module NgBankParser
|
4
|
+
class PDFUnlocker
|
5
|
+
include HTTMultiParty
|
6
|
+
base_uri 'http://pdf-unlocker.herokuapp.com'
|
7
|
+
|
8
|
+
def initialize(file, password)
|
9
|
+
@pdf = file
|
10
|
+
@password = password
|
11
|
+
end
|
12
|
+
|
13
|
+
|
14
|
+
def unlocked_pdf
|
15
|
+
options = { :pdf => @pdf, :password => @password }
|
16
|
+
begin
|
17
|
+
response = self.class.post('/rest/pdf/unlock', :query => options, :detect_mime_type => true).parsed_response
|
18
|
+
rescue StandardError
|
19
|
+
response = nil
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|