ng-bank-parser 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +35 -0
- data/.rspec +2 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +80 -0
- data/LICENSE +340 -0
- data/LICENSE.txt +21 -0
- data/README.md +92 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/lib/ng-bank-parser.rb +11 -0
- data/lib/ng-bank-parser/banks.rb +24 -0
- data/lib/ng-bank-parser/fixtures/firstbank-pdf-invalid.xlsx +0 -0
- data/lib/ng-bank-parser/fixtures/firstbank-pdf-valid.pdf +0 -0
- data/lib/ng-bank-parser/fixtures/gtb-excel-invalid.pdf +0 -0
- data/lib/ng-bank-parser/fixtures/gtb-excel-valid.xls +1332 -0
- data/lib/ng-bank-parser/fixtures/gtb-excel-valid.xlsx +0 -0
- data/lib/ng-bank-parser/fixtures/uba-pdf-invalid.pdf +0 -0
- data/lib/ng-bank-parser/fixtures/uba-pdf-valid.pdf +0 -0
- data/lib/ng-bank-parser/parsers/firstbank-pdf-parser.rb +81 -0
- data/lib/ng-bank-parser/parsers/firstbank-pdf-parser/helpers.rb +172 -0
- data/lib/ng-bank-parser/parsers/firstbank-pdf-parser/statement_utils.rb +51 -0
- data/lib/ng-bank-parser/parsers/gtb-excel-parser.rb +66 -0
- data/lib/ng-bank-parser/parsers/gtb-excel-parser/helpers.rb +121 -0
- data/lib/ng-bank-parser/parsers/uba-pdf-parser.rb +98 -0
- data/lib/ng-bank-parser/parsers/uba-pdf-parser/classes/string.rb +56 -0
- data/lib/ng-bank-parser/parsers/uba-pdf-parser/classes/transaction.rb +73 -0
- data/lib/ng-bank-parser/parsers/uba-pdf-parser/constants.rb +32 -0
- data/lib/ng-bank-parser/parsers/uba-pdf-parser/pdf_checks.rb +58 -0
- data/lib/ng-bank-parser/parsers/uba-pdf-parser/transaction_tools.rb +110 -0
- data/lib/ng-bank-parser/pdf-unlocker.rb +24 -0
- data/lib/ng-bank-parser/router.rb +44 -0
- data/lib/ng-bank-parser/version.rb +3 -0
- data/ng-bank-parser.gemspec +31 -0
- metadata +198 -0
@@ -0,0 +1,98 @@
|
|
1
|
+
require 'pdf-reader'
|
2
|
+
require 'open-uri'
|
3
|
+
require_relative 'uba-pdf-parser/transaction_tools'
|
4
|
+
|
5
|
+
|
6
|
+
module NgBankParser
|
7
|
+
class UbaPdf
|
8
|
+
extend TransactionTools
|
9
|
+
|
10
|
+
class << self
|
11
|
+
def parse(url, password = nil)
|
12
|
+
unless ACCEPTED_FORMATS.include? File.extname(url)
|
13
|
+
return invalid_file
|
14
|
+
end
|
15
|
+
|
16
|
+
file = open(url)
|
17
|
+
@reader = PDF::Reader.new(file)
|
18
|
+
set_up_first_page
|
19
|
+
if is_valid_pdf?
|
20
|
+
set_account_details
|
21
|
+
set_transactions
|
22
|
+
return return_payload
|
23
|
+
else
|
24
|
+
return invalid_file
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def set_up_first_page
|
31
|
+
@first_page_text = @reader.pages.first.text.remove_empty_lines
|
32
|
+
set_column_positions(@first_page_text.lines[TABLE_HEADER_LINE_INDEX])
|
33
|
+
end
|
34
|
+
|
35
|
+
def set_account_details
|
36
|
+
set_bank_name
|
37
|
+
set_account_name
|
38
|
+
set_account_number
|
39
|
+
set_dates
|
40
|
+
# print_account_details
|
41
|
+
end
|
42
|
+
|
43
|
+
def print_account_details
|
44
|
+
puts @bank_name
|
45
|
+
puts @account_name
|
46
|
+
puts @account_number
|
47
|
+
puts @from_date
|
48
|
+
puts @to_date
|
49
|
+
end
|
50
|
+
|
51
|
+
def set_bank_name
|
52
|
+
@bank_name = "United Bank for Africa"
|
53
|
+
end
|
54
|
+
|
55
|
+
def set_account_name
|
56
|
+
account_name_line = @first_page_text.lines[ACCOUNT_NAME_LINE_INDEX]
|
57
|
+
@account_name = account_name_line.get_text_between_markers(ACCOUNT_NAME_START_MARKER, ACCOUNT_NAME_END_MARKER)
|
58
|
+
end
|
59
|
+
|
60
|
+
def set_account_number
|
61
|
+
account_number_line = @first_page_text.lines[ACCOUNT_NUMBER_LINE_INDEX]
|
62
|
+
account_number_line_string = account_number_line.remove_white_spaces.downcase!
|
63
|
+
@account_number = account_number_line_string.get_text_between_markers(ACCOUNT_NUMBER_STRING, CURRENCY_STRING).upcase!
|
64
|
+
end
|
65
|
+
|
66
|
+
def set_dates
|
67
|
+
date_line = @first_page_text.lines[1]
|
68
|
+
date_line_string = date_line.remove_white_spaces.downcase!
|
69
|
+
@from_date = date_line_string.get_text_between_markers(START_DATE_STRING, END_DATE_STRING).upcase!.convert_to_date
|
70
|
+
@to_date = date_line_string.get_text_after_marker(END_DATE_STRING).upcase!.convert_to_date
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
def return_payload
|
75
|
+
return {
|
76
|
+
status: 1,
|
77
|
+
data: {
|
78
|
+
bank_name: @bank_name,
|
79
|
+
account_number: @account_number,
|
80
|
+
account_name: @account_name,
|
81
|
+
from_date: @from_date,
|
82
|
+
to_date: @to_date,
|
83
|
+
transactions: @transactions_hashes_array
|
84
|
+
}
|
85
|
+
}
|
86
|
+
end
|
87
|
+
|
88
|
+
def invalid_file
|
89
|
+
return {
|
90
|
+
status: 0, message: INVALID_FILE_STRING
|
91
|
+
}
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
|
@@ -0,0 +1,56 @@
|
|
1
|
+
class String
|
2
|
+
def extract_column(column)
|
3
|
+
self[column[:start]..column[:end]]
|
4
|
+
end
|
5
|
+
|
6
|
+
def remove_empty_lines
|
7
|
+
self.gsub /^$\n/, ''
|
8
|
+
end
|
9
|
+
|
10
|
+
def remove_multiple_lines
|
11
|
+
self.gsub('\n', '')
|
12
|
+
end
|
13
|
+
|
14
|
+
def remove_white_spaces
|
15
|
+
self.gsub(/\s+/, "")
|
16
|
+
end
|
17
|
+
|
18
|
+
def reduce_to_singular_white_space
|
19
|
+
self.gsub(/\s+/, " ")
|
20
|
+
end
|
21
|
+
|
22
|
+
def remove_commas
|
23
|
+
return self.gsub(/,/, '')
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
def get_text_between_markers(marker1, marker2)
|
28
|
+
self[/#{Regexp.escape(marker1)}(.*?)#{Regexp.escape(marker2)}/m, 1]
|
29
|
+
end
|
30
|
+
|
31
|
+
def get_text_after_marker(marker)
|
32
|
+
self.partition(marker).last
|
33
|
+
end
|
34
|
+
|
35
|
+
def get_date_string
|
36
|
+
self[0...10]
|
37
|
+
end
|
38
|
+
|
39
|
+
def get_first_line
|
40
|
+
self.lines[0]
|
41
|
+
end
|
42
|
+
|
43
|
+
def is_date?
|
44
|
+
begin
|
45
|
+
Date.parse(self)
|
46
|
+
rescue ArgumentError
|
47
|
+
return false
|
48
|
+
end
|
49
|
+
return true
|
50
|
+
end
|
51
|
+
|
52
|
+
def convert_to_date
|
53
|
+
Date.strptime(self, '%d-%b-%Y')
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
require 'active_support/json/decoding.rb'
|
2
|
+
require 'active_support/json/encoding.rb'
|
3
|
+
|
4
|
+
|
5
|
+
class Transaction
|
6
|
+
|
7
|
+
TYPE = {
|
8
|
+
debit: 'debit',
|
9
|
+
credit: 'credit'
|
10
|
+
}
|
11
|
+
|
12
|
+
|
13
|
+
def initialize(string, columns)
|
14
|
+
set_date_from_transaction_string(string)
|
15
|
+
set_amount_and_type_from_transaction_string(string, columns[:debit_column], columns[:credit_column])
|
16
|
+
set_balance_from_transaction_string(string, columns[:balance_column])
|
17
|
+
set_remarks_from_transaction_string(string, columns[:remarks_column])
|
18
|
+
set_ref
|
19
|
+
end
|
20
|
+
|
21
|
+
def to_hash
|
22
|
+
{
|
23
|
+
date: @date,
|
24
|
+
amount: @amount,
|
25
|
+
type: @type,
|
26
|
+
balance: @balance,
|
27
|
+
remarks: @remarks,
|
28
|
+
ref: @ref
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def set_date_from_transaction_string(string)
|
35
|
+
@date = Date.strptime(string.get_first_line.get_date_string, '%d-%m-%Y')
|
36
|
+
end
|
37
|
+
|
38
|
+
def set_amount_and_type_from_transaction_string(string, debit_column, credit_column)
|
39
|
+
debit = string.get_first_line.extract_column(debit_column).remove_white_spaces
|
40
|
+
credit = string.get_first_line.extract_column(credit_column).remove_white_spaces
|
41
|
+
|
42
|
+
if (debit.blank? && !credit.blank?)
|
43
|
+
@type = TYPE[:credit]
|
44
|
+
@amount = credit.remove_commas.to_f
|
45
|
+
elsif (credit.blank? && !debit.blank?)
|
46
|
+
@type = TYPE[:debit]
|
47
|
+
@amount = debit.remove_commas.to_f
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def set_balance_from_transaction_string(string, balance_column)
|
52
|
+
@balance = string.get_first_line.extract_column(balance_column).remove_white_spaces.remove_commas.to_f
|
53
|
+
end
|
54
|
+
|
55
|
+
def set_remarks_from_transaction_string(string, remarks_column)
|
56
|
+
@remarks ||= ''
|
57
|
+
if string.lines.count == 1
|
58
|
+
@remarks = string.extract_column(remarks_column)
|
59
|
+
elsif string.lines.count > 1
|
60
|
+
string.lines.each do |line|
|
61
|
+
@remarks = @remarks.+ line.extract_column(remarks_column)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
@remarks = @remarks.reduce_to_singular_white_space
|
65
|
+
end
|
66
|
+
|
67
|
+
def set_ref
|
68
|
+
@ref = '0'
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
end
|
73
|
+
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module NgBankParser
|
2
|
+
module Constants
|
3
|
+
ACCEPTED_FORMATS = ['.pdf']
|
4
|
+
|
5
|
+
TABLE_HEADER_LINE_INDEX = 6
|
6
|
+
LENGTH_OF_TRANSACTION_DATE_STRING = 10
|
7
|
+
ACCOUNT_NAME_LINE_INDEX = 2
|
8
|
+
ACCOUNT_NUMBER_LINE_INDEX = 5
|
9
|
+
RANGE_OF_LINES_WITH_TRANSACTIONS = 7..-3
|
10
|
+
|
11
|
+
|
12
|
+
FIRST_LINE_CHECKER_STRING = 'statementoftransactions'
|
13
|
+
START_DATE_STRING = 'startdate:'
|
14
|
+
END_DATE_STRING = 'enddate:'
|
15
|
+
ACCOUNT_NUMBER_STRING = 'accountnumber:'
|
16
|
+
CURRENCY_STRING = 'currency'
|
17
|
+
ACCOUNT_NAME_START_MARKER = '( '
|
18
|
+
ACCOUNT_NAME_END_MARKER = ' )'
|
19
|
+
|
20
|
+
|
21
|
+
DATE_COLUMN_HEADER_STRING = 'date'
|
22
|
+
REMARKS_COLUMN_HEADER_STRING = 'description'
|
23
|
+
CHEQUE_NUMBER_COLUMN_HEADER_STRING = 'chq. no'
|
24
|
+
VAL_DATE_COLUMN_HEADER_STRING = 'val date'
|
25
|
+
DEBIT_COLUMN_HEADER_STRING = 'debit'
|
26
|
+
CREDIT_COLUMN_HEADER_STRING = 'credit'
|
27
|
+
BALANCE_COLUMN_HEADER_STRING = 'balance'
|
28
|
+
|
29
|
+
COLUMN_OFFSET_RANGE = 0..-2
|
30
|
+
INVALID_FILE_STRING = 'Invalid File'
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require_relative 'constants'
|
2
|
+
|
3
|
+
module NgBankParser
|
4
|
+
module PdfChecks
|
5
|
+
include Constants
|
6
|
+
|
7
|
+
private
|
8
|
+
|
9
|
+
def is_valid_pdf?
|
10
|
+
is_first_line_of_first_page_correct? && is_date_range_line_correct? && is_account_number_line_correct? && are_columns_correct?
|
11
|
+
end
|
12
|
+
|
13
|
+
|
14
|
+
def is_first_line_of_first_page_correct?
|
15
|
+
line_string = @first_page_text.lines[0].remove_white_spaces.downcase!
|
16
|
+
return line_string.end_with?(FIRST_LINE_CHECKER_STRING)
|
17
|
+
end
|
18
|
+
|
19
|
+
def is_date_range_line_correct?
|
20
|
+
date_line = @first_page_text.lines[1]
|
21
|
+
date_line_string = date_line.remove_white_spaces.downcase!
|
22
|
+
return date_line_string.get_text_between_markers(START_DATE_STRING, END_DATE_STRING).upcase!.is_date? && date_line_string.get_text_after_marker(START_DATE_STRING).upcase!.is_date?
|
23
|
+
end
|
24
|
+
|
25
|
+
def is_account_number_line_correct?
|
26
|
+
account_number_line = @first_page_text.lines[ACCOUNT_NUMBER_LINE_INDEX]
|
27
|
+
return account_number_line.remove_white_spaces.downcase!.start_with?(ACCOUNT_NUMBER_STRING)
|
28
|
+
end
|
29
|
+
|
30
|
+
def are_columns_correct?
|
31
|
+
is_date_column_correct? && is_remarks_column_correct? && is_debit_column_correct? && is_credit_column_correct? && is_balance_column_correct?
|
32
|
+
end
|
33
|
+
|
34
|
+
def is_date_column_correct?
|
35
|
+
@first_page_text.lines[TABLE_HEADER_LINE_INDEX]
|
36
|
+
.extract_column(@date_column)
|
37
|
+
.remove_white_spaces.downcase![COLUMN_OFFSET_RANGE] == DATE_COLUMN_HEADER_STRING
|
38
|
+
end
|
39
|
+
|
40
|
+
def is_remarks_column_correct?
|
41
|
+
@first_page_text.lines[TABLE_HEADER_LINE_INDEX].extract_column(@remarks_column).remove_white_spaces.downcase![COLUMN_OFFSET_RANGE] == REMARKS_COLUMN_HEADER_STRING
|
42
|
+
end
|
43
|
+
|
44
|
+
def is_debit_column_correct?
|
45
|
+
@first_page_text.lines[TABLE_HEADER_LINE_INDEX].extract_column(@debit_column).remove_white_spaces.downcase! == DEBIT_COLUMN_HEADER_STRING
|
46
|
+
end
|
47
|
+
|
48
|
+
def is_credit_column_correct?
|
49
|
+
@first_page_text.lines[TABLE_HEADER_LINE_INDEX].extract_column(@credit_column).remove_white_spaces.downcase! == CREDIT_COLUMN_HEADER_STRING
|
50
|
+
end
|
51
|
+
|
52
|
+
def is_balance_column_correct?
|
53
|
+
@first_page_text.lines[TABLE_HEADER_LINE_INDEX].extract_column(@balance_column).remove_white_spaces.downcase! == BALANCE_COLUMN_HEADER_STRING
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
require 'date'
|
2
|
+
require 'active_support/core_ext/object/blank'
|
3
|
+
require_relative 'classes/string'
|
4
|
+
require_relative 'classes/transaction'
|
5
|
+
require_relative 'pdf_checks'
|
6
|
+
|
7
|
+
|
8
|
+
module NgBankParser
|
9
|
+
module TransactionTools
|
10
|
+
include PdfChecks
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def set_transactions
|
15
|
+
@transactions ||= Array.new
|
16
|
+
@transaction_strings ||= Array.new
|
17
|
+
@reader.pages[0..-2].each do |page|
|
18
|
+
seperate_lines_into_transaction_strings(page.text.remove_empty_lines.lines)
|
19
|
+
end
|
20
|
+
@transaction_strings.compact!
|
21
|
+
@transaction_strings.each do |transaction_string|
|
22
|
+
transaction = Transaction.new(transaction_string, @columns)
|
23
|
+
@transactions << transaction
|
24
|
+
end
|
25
|
+
@transactions.shift
|
26
|
+
set_transactions_hashes_array
|
27
|
+
end
|
28
|
+
|
29
|
+
def set_transactions_hashes_array
|
30
|
+
@transactions_hashes_array = []
|
31
|
+
@transactions.each do |transaction|
|
32
|
+
@transactions_hashes_array << transaction.to_hash
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def seperate_lines_into_transaction_strings(lines)
|
37
|
+
@lines = lines[RANGE_OF_LINES_WITH_TRANSACTIONS]
|
38
|
+
@lines.each_with_index do |line, index|
|
39
|
+
if line_has_date?(line)
|
40
|
+
transaction_string = line
|
41
|
+
next_lines = get_next_lines(index)
|
42
|
+
next_lines.each do |next_line|
|
43
|
+
if !line_has_date?(next_line)
|
44
|
+
transaction_string = transaction_string + next_line
|
45
|
+
else
|
46
|
+
break
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
@transaction_strings << transaction_string
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def line_has_date?(line)
|
55
|
+
line.get_date_string.is_date?
|
56
|
+
end
|
57
|
+
|
58
|
+
def get_next_lines(index)
|
59
|
+
@lines[index+1..-1]
|
60
|
+
end
|
61
|
+
|
62
|
+
def set_column_positions(column_header_line)
|
63
|
+
begin
|
64
|
+
column_header_line.downcase!
|
65
|
+
@date_column = {
|
66
|
+
start: column_header_line.index(DATE_COLUMN_HEADER_STRING),
|
67
|
+
end: column_header_line.index(REMARKS_COLUMN_HEADER_STRING)
|
68
|
+
}
|
69
|
+
@remarks_column = {
|
70
|
+
start: column_header_line.index(REMARKS_COLUMN_HEADER_STRING),
|
71
|
+
end: column_header_line.index(CHEQUE_NUMBER_COLUMN_HEADER_STRING)
|
72
|
+
}
|
73
|
+
@cheque_column = {
|
74
|
+
start: column_header_line.index(CHEQUE_NUMBER_COLUMN_HEADER_STRING),
|
75
|
+
end: column_header_line.index(VAL_DATE_COLUMN_HEADER_STRING)
|
76
|
+
}
|
77
|
+
@val_date_column = {
|
78
|
+
start: column_header_line.index(VAL_DATE_COLUMN_HEADER_STRING),
|
79
|
+
end: column_header_line.index(VAL_DATE_COLUMN_HEADER_STRING) + LENGTH_OF_TRANSACTION_DATE_STRING
|
80
|
+
}
|
81
|
+
@debit_column = {
|
82
|
+
start: @val_date_column[:start] + LENGTH_OF_TRANSACTION_DATE_STRING,
|
83
|
+
end: column_header_line.index(DEBIT_COLUMN_HEADER_STRING) + 5
|
84
|
+
}
|
85
|
+
@credit_column = {
|
86
|
+
start: @debit_column[:end] + 1,
|
87
|
+
end: column_header_line.index(CREDIT_COLUMN_HEADER_STRING) + 7
|
88
|
+
}
|
89
|
+
@balance_column = {
|
90
|
+
start: column_header_line.index(BALANCE_COLUMN_HEADER_STRING),
|
91
|
+
end: -1
|
92
|
+
}
|
93
|
+
@columns = {
|
94
|
+
date_column: @date_column,
|
95
|
+
remarks_column: @remarks_column,
|
96
|
+
cheque_column: @cheque_column,
|
97
|
+
val_date_column: @val_date_column,
|
98
|
+
debit_column: @debit_column,
|
99
|
+
credit_column: @credit_column,
|
100
|
+
balance_column: @balance_column
|
101
|
+
}
|
102
|
+
|
103
|
+
rescue NoMethodError
|
104
|
+
return invalid_file
|
105
|
+
end
|
106
|
+
|
107
|
+
end
|
108
|
+
|
109
|
+
end
|
110
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'httmultiparty'
|
2
|
+
|
3
|
+
module NgBankParser
|
4
|
+
class PDFUnlocker
|
5
|
+
include HTTMultiParty
|
6
|
+
base_uri 'http://pdf-unlocker.herokuapp.com'
|
7
|
+
|
8
|
+
def initialize(file, password)
|
9
|
+
@pdf = file
|
10
|
+
@password = password
|
11
|
+
end
|
12
|
+
|
13
|
+
|
14
|
+
def unlocked_pdf
|
15
|
+
options = { :pdf => @pdf, :password => @password }
|
16
|
+
begin
|
17
|
+
response = self.class.post('/rest/pdf/unlock', :query => options, :detect_mime_type => true).parsed_response
|
18
|
+
rescue StandardError
|
19
|
+
response = nil
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|