ng-bank-parser 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +35 -0
  3. data/.rspec +2 -0
  4. data/CODE_OF_CONDUCT.md +13 -0
  5. data/Gemfile +4 -0
  6. data/Gemfile.lock +80 -0
  7. data/LICENSE +340 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +92 -0
  10. data/Rakefile +6 -0
  11. data/bin/console +14 -0
  12. data/bin/setup +7 -0
  13. data/lib/ng-bank-parser.rb +11 -0
  14. data/lib/ng-bank-parser/banks.rb +24 -0
  15. data/lib/ng-bank-parser/fixtures/firstbank-pdf-invalid.xlsx +0 -0
  16. data/lib/ng-bank-parser/fixtures/firstbank-pdf-valid.pdf +0 -0
  17. data/lib/ng-bank-parser/fixtures/gtb-excel-invalid.pdf +0 -0
  18. data/lib/ng-bank-parser/fixtures/gtb-excel-valid.xls +1332 -0
  19. data/lib/ng-bank-parser/fixtures/gtb-excel-valid.xlsx +0 -0
  20. data/lib/ng-bank-parser/fixtures/uba-pdf-invalid.pdf +0 -0
  21. data/lib/ng-bank-parser/fixtures/uba-pdf-valid.pdf +0 -0
  22. data/lib/ng-bank-parser/parsers/firstbank-pdf-parser.rb +81 -0
  23. data/lib/ng-bank-parser/parsers/firstbank-pdf-parser/helpers.rb +172 -0
  24. data/lib/ng-bank-parser/parsers/firstbank-pdf-parser/statement_utils.rb +51 -0
  25. data/lib/ng-bank-parser/parsers/gtb-excel-parser.rb +66 -0
  26. data/lib/ng-bank-parser/parsers/gtb-excel-parser/helpers.rb +121 -0
  27. data/lib/ng-bank-parser/parsers/uba-pdf-parser.rb +98 -0
  28. data/lib/ng-bank-parser/parsers/uba-pdf-parser/classes/string.rb +56 -0
  29. data/lib/ng-bank-parser/parsers/uba-pdf-parser/classes/transaction.rb +73 -0
  30. data/lib/ng-bank-parser/parsers/uba-pdf-parser/constants.rb +32 -0
  31. data/lib/ng-bank-parser/parsers/uba-pdf-parser/pdf_checks.rb +58 -0
  32. data/lib/ng-bank-parser/parsers/uba-pdf-parser/transaction_tools.rb +110 -0
  33. data/lib/ng-bank-parser/pdf-unlocker.rb +24 -0
  34. data/lib/ng-bank-parser/router.rb +44 -0
  35. data/lib/ng-bank-parser/version.rb +3 -0
  36. data/ng-bank-parser.gemspec +31 -0
  37. metadata +198 -0
@@ -0,0 +1,98 @@
1
+ require 'pdf-reader'
2
+ require 'open-uri'
3
+ require_relative 'uba-pdf-parser/transaction_tools'
4
+
5
+
6
+ module NgBankParser
7
+ class UbaPdf
8
+ extend TransactionTools
9
+
10
+ class << self
11
+ def parse(url, password = nil)
12
+ unless ACCEPTED_FORMATS.include? File.extname(url)
13
+ return invalid_file
14
+ end
15
+
16
+ file = open(url)
17
+ @reader = PDF::Reader.new(file)
18
+ set_up_first_page
19
+ if is_valid_pdf?
20
+ set_account_details
21
+ set_transactions
22
+ return return_payload
23
+ else
24
+ return invalid_file
25
+ end
26
+ end
27
+
28
+ private
29
+
30
+ def set_up_first_page
31
+ @first_page_text = @reader.pages.first.text.remove_empty_lines
32
+ set_column_positions(@first_page_text.lines[TABLE_HEADER_LINE_INDEX])
33
+ end
34
+
35
+ def set_account_details
36
+ set_bank_name
37
+ set_account_name
38
+ set_account_number
39
+ set_dates
40
+ # print_account_details
41
+ end
42
+
43
+ def print_account_details
44
+ puts @bank_name
45
+ puts @account_name
46
+ puts @account_number
47
+ puts @from_date
48
+ puts @to_date
49
+ end
50
+
51
+ def set_bank_name
52
+ @bank_name = "United Bank for Africa"
53
+ end
54
+
55
+ def set_account_name
56
+ account_name_line = @first_page_text.lines[ACCOUNT_NAME_LINE_INDEX]
57
+ @account_name = account_name_line.get_text_between_markers(ACCOUNT_NAME_START_MARKER, ACCOUNT_NAME_END_MARKER)
58
+ end
59
+
60
+ def set_account_number
61
+ account_number_line = @first_page_text.lines[ACCOUNT_NUMBER_LINE_INDEX]
62
+ account_number_line_string = account_number_line.remove_white_spaces.downcase!
63
+ @account_number = account_number_line_string.get_text_between_markers(ACCOUNT_NUMBER_STRING, CURRENCY_STRING).upcase!
64
+ end
65
+
66
+ def set_dates
67
+ date_line = @first_page_text.lines[1]
68
+ date_line_string = date_line.remove_white_spaces.downcase!
69
+ @from_date = date_line_string.get_text_between_markers(START_DATE_STRING, END_DATE_STRING).upcase!.convert_to_date
70
+ @to_date = date_line_string.get_text_after_marker(END_DATE_STRING).upcase!.convert_to_date
71
+ end
72
+
73
+
74
+ def return_payload
75
+ return {
76
+ status: 1,
77
+ data: {
78
+ bank_name: @bank_name,
79
+ account_number: @account_number,
80
+ account_name: @account_name,
81
+ from_date: @from_date,
82
+ to_date: @to_date,
83
+ transactions: @transactions_hashes_array
84
+ }
85
+ }
86
+ end
87
+
88
+ def invalid_file
89
+ return {
90
+ status: 0, message: INVALID_FILE_STRING
91
+ }
92
+ end
93
+
94
+ end
95
+ end
96
+ end
97
+
98
+
@@ -0,0 +1,56 @@
1
+ class String
2
+ def extract_column(column)
3
+ self[column[:start]..column[:end]]
4
+ end
5
+
6
+ def remove_empty_lines
7
+ self.gsub /^$\n/, ''
8
+ end
9
+
10
+ def remove_multiple_lines
11
+ self.gsub('\n', '')
12
+ end
13
+
14
+ def remove_white_spaces
15
+ self.gsub(/\s+/, "")
16
+ end
17
+
18
+ def reduce_to_singular_white_space
19
+ self.gsub(/\s+/, " ")
20
+ end
21
+
22
+ def remove_commas
23
+ return self.gsub(/,/, '')
24
+ end
25
+
26
+
27
+ def get_text_between_markers(marker1, marker2)
28
+ self[/#{Regexp.escape(marker1)}(.*?)#{Regexp.escape(marker2)}/m, 1]
29
+ end
30
+
31
+ def get_text_after_marker(marker)
32
+ self.partition(marker).last
33
+ end
34
+
35
+ def get_date_string
36
+ self[0...10]
37
+ end
38
+
39
+ def get_first_line
40
+ self.lines[0]
41
+ end
42
+
43
+ def is_date?
44
+ begin
45
+ Date.parse(self)
46
+ rescue ArgumentError
47
+ return false
48
+ end
49
+ return true
50
+ end
51
+
52
+ def convert_to_date
53
+ Date.strptime(self, '%d-%b-%Y')
54
+ end
55
+
56
+ end
@@ -0,0 +1,73 @@
1
+ require 'active_support/json/decoding.rb'
2
+ require 'active_support/json/encoding.rb'
3
+
4
+
5
+ class Transaction
6
+
7
+ TYPE = {
8
+ debit: 'debit',
9
+ credit: 'credit'
10
+ }
11
+
12
+
13
+ def initialize(string, columns)
14
+ set_date_from_transaction_string(string)
15
+ set_amount_and_type_from_transaction_string(string, columns[:debit_column], columns[:credit_column])
16
+ set_balance_from_transaction_string(string, columns[:balance_column])
17
+ set_remarks_from_transaction_string(string, columns[:remarks_column])
18
+ set_ref
19
+ end
20
+
21
+ def to_hash
22
+ {
23
+ date: @date,
24
+ amount: @amount,
25
+ type: @type,
26
+ balance: @balance,
27
+ remarks: @remarks,
28
+ ref: @ref
29
+ }
30
+ end
31
+
32
+ private
33
+
34
+ def set_date_from_transaction_string(string)
35
+ @date = Date.strptime(string.get_first_line.get_date_string, '%d-%m-%Y')
36
+ end
37
+
38
+ def set_amount_and_type_from_transaction_string(string, debit_column, credit_column)
39
+ debit = string.get_first_line.extract_column(debit_column).remove_white_spaces
40
+ credit = string.get_first_line.extract_column(credit_column).remove_white_spaces
41
+
42
+ if (debit.blank? && !credit.blank?)
43
+ @type = TYPE[:credit]
44
+ @amount = credit.remove_commas.to_f
45
+ elsif (credit.blank? && !debit.blank?)
46
+ @type = TYPE[:debit]
47
+ @amount = debit.remove_commas.to_f
48
+ end
49
+ end
50
+
51
+ def set_balance_from_transaction_string(string, balance_column)
52
+ @balance = string.get_first_line.extract_column(balance_column).remove_white_spaces.remove_commas.to_f
53
+ end
54
+
55
+ def set_remarks_from_transaction_string(string, remarks_column)
56
+ @remarks ||= ''
57
+ if string.lines.count == 1
58
+ @remarks = string.extract_column(remarks_column)
59
+ elsif string.lines.count > 1
60
+ string.lines.each do |line|
61
+ @remarks = @remarks.+ line.extract_column(remarks_column)
62
+ end
63
+ end
64
+ @remarks = @remarks.reduce_to_singular_white_space
65
+ end
66
+
67
+ def set_ref
68
+ @ref = '0'
69
+ end
70
+
71
+
72
+ end
73
+
@@ -0,0 +1,32 @@
1
+ module NgBankParser
2
+ module Constants
3
+ ACCEPTED_FORMATS = ['.pdf']
4
+
5
+ TABLE_HEADER_LINE_INDEX = 6
6
+ LENGTH_OF_TRANSACTION_DATE_STRING = 10
7
+ ACCOUNT_NAME_LINE_INDEX = 2
8
+ ACCOUNT_NUMBER_LINE_INDEX = 5
9
+ RANGE_OF_LINES_WITH_TRANSACTIONS = 7..-3
10
+
11
+
12
+ FIRST_LINE_CHECKER_STRING = 'statementoftransactions'
13
+ START_DATE_STRING = 'startdate:'
14
+ END_DATE_STRING = 'enddate:'
15
+ ACCOUNT_NUMBER_STRING = 'accountnumber:'
16
+ CURRENCY_STRING = 'currency'
17
+ ACCOUNT_NAME_START_MARKER = '( '
18
+ ACCOUNT_NAME_END_MARKER = ' )'
19
+
20
+
21
+ DATE_COLUMN_HEADER_STRING = 'date'
22
+ REMARKS_COLUMN_HEADER_STRING = 'description'
23
+ CHEQUE_NUMBER_COLUMN_HEADER_STRING = 'chq. no'
24
+ VAL_DATE_COLUMN_HEADER_STRING = 'val date'
25
+ DEBIT_COLUMN_HEADER_STRING = 'debit'
26
+ CREDIT_COLUMN_HEADER_STRING = 'credit'
27
+ BALANCE_COLUMN_HEADER_STRING = 'balance'
28
+
29
+ COLUMN_OFFSET_RANGE = 0..-2
30
+ INVALID_FILE_STRING = 'Invalid File'
31
+ end
32
+ end
@@ -0,0 +1,58 @@
1
+ require_relative 'constants'
2
+
3
+ module NgBankParser
4
+ module PdfChecks
5
+ include Constants
6
+
7
+ private
8
+
9
+ def is_valid_pdf?
10
+ is_first_line_of_first_page_correct? && is_date_range_line_correct? && is_account_number_line_correct? && are_columns_correct?
11
+ end
12
+
13
+
14
+ def is_first_line_of_first_page_correct?
15
+ line_string = @first_page_text.lines[0].remove_white_spaces.downcase!
16
+ return line_string.end_with?(FIRST_LINE_CHECKER_STRING)
17
+ end
18
+
19
+ def is_date_range_line_correct?
20
+ date_line = @first_page_text.lines[1]
21
+ date_line_string = date_line.remove_white_spaces.downcase!
22
+ return date_line_string.get_text_between_markers(START_DATE_STRING, END_DATE_STRING).upcase!.is_date? && date_line_string.get_text_after_marker(START_DATE_STRING).upcase!.is_date?
23
+ end
24
+
25
+ def is_account_number_line_correct?
26
+ account_number_line = @first_page_text.lines[ACCOUNT_NUMBER_LINE_INDEX]
27
+ return account_number_line.remove_white_spaces.downcase!.start_with?(ACCOUNT_NUMBER_STRING)
28
+ end
29
+
30
+ def are_columns_correct?
31
+ is_date_column_correct? && is_remarks_column_correct? && is_debit_column_correct? && is_credit_column_correct? && is_balance_column_correct?
32
+ end
33
+
34
+ def is_date_column_correct?
35
+ @first_page_text.lines[TABLE_HEADER_LINE_INDEX]
36
+ .extract_column(@date_column)
37
+ .remove_white_spaces.downcase![COLUMN_OFFSET_RANGE] == DATE_COLUMN_HEADER_STRING
38
+ end
39
+
40
+ def is_remarks_column_correct?
41
+ @first_page_text.lines[TABLE_HEADER_LINE_INDEX].extract_column(@remarks_column).remove_white_spaces.downcase![COLUMN_OFFSET_RANGE] == REMARKS_COLUMN_HEADER_STRING
42
+ end
43
+
44
+ def is_debit_column_correct?
45
+ @first_page_text.lines[TABLE_HEADER_LINE_INDEX].extract_column(@debit_column).remove_white_spaces.downcase! == DEBIT_COLUMN_HEADER_STRING
46
+ end
47
+
48
+ def is_credit_column_correct?
49
+ @first_page_text.lines[TABLE_HEADER_LINE_INDEX].extract_column(@credit_column).remove_white_spaces.downcase! == CREDIT_COLUMN_HEADER_STRING
50
+ end
51
+
52
+ def is_balance_column_correct?
53
+ @first_page_text.lines[TABLE_HEADER_LINE_INDEX].extract_column(@balance_column).remove_white_spaces.downcase! == BALANCE_COLUMN_HEADER_STRING
54
+ end
55
+
56
+
57
+ end
58
+ end
@@ -0,0 +1,110 @@
1
+ require 'date'
2
+ require 'active_support/core_ext/object/blank'
3
+ require_relative 'classes/string'
4
+ require_relative 'classes/transaction'
5
+ require_relative 'pdf_checks'
6
+
7
+
8
+ module NgBankParser
9
+ module TransactionTools
10
+ include PdfChecks
11
+
12
+ private
13
+
14
+ def set_transactions
15
+ @transactions ||= Array.new
16
+ @transaction_strings ||= Array.new
17
+ @reader.pages[0..-2].each do |page|
18
+ seperate_lines_into_transaction_strings(page.text.remove_empty_lines.lines)
19
+ end
20
+ @transaction_strings.compact!
21
+ @transaction_strings.each do |transaction_string|
22
+ transaction = Transaction.new(transaction_string, @columns)
23
+ @transactions << transaction
24
+ end
25
+ @transactions.shift
26
+ set_transactions_hashes_array
27
+ end
28
+
29
+ def set_transactions_hashes_array
30
+ @transactions_hashes_array = []
31
+ @transactions.each do |transaction|
32
+ @transactions_hashes_array << transaction.to_hash
33
+ end
34
+ end
35
+
36
+ def seperate_lines_into_transaction_strings(lines)
37
+ @lines = lines[RANGE_OF_LINES_WITH_TRANSACTIONS]
38
+ @lines.each_with_index do |line, index|
39
+ if line_has_date?(line)
40
+ transaction_string = line
41
+ next_lines = get_next_lines(index)
42
+ next_lines.each do |next_line|
43
+ if !line_has_date?(next_line)
44
+ transaction_string = transaction_string + next_line
45
+ else
46
+ break
47
+ end
48
+ end
49
+ end
50
+ @transaction_strings << transaction_string
51
+ end
52
+ end
53
+
54
+ def line_has_date?(line)
55
+ line.get_date_string.is_date?
56
+ end
57
+
58
+ def get_next_lines(index)
59
+ @lines[index+1..-1]
60
+ end
61
+
62
+ def set_column_positions(column_header_line)
63
+ begin
64
+ column_header_line.downcase!
65
+ @date_column = {
66
+ start: column_header_line.index(DATE_COLUMN_HEADER_STRING),
67
+ end: column_header_line.index(REMARKS_COLUMN_HEADER_STRING)
68
+ }
69
+ @remarks_column = {
70
+ start: column_header_line.index(REMARKS_COLUMN_HEADER_STRING),
71
+ end: column_header_line.index(CHEQUE_NUMBER_COLUMN_HEADER_STRING)
72
+ }
73
+ @cheque_column = {
74
+ start: column_header_line.index(CHEQUE_NUMBER_COLUMN_HEADER_STRING),
75
+ end: column_header_line.index(VAL_DATE_COLUMN_HEADER_STRING)
76
+ }
77
+ @val_date_column = {
78
+ start: column_header_line.index(VAL_DATE_COLUMN_HEADER_STRING),
79
+ end: column_header_line.index(VAL_DATE_COLUMN_HEADER_STRING) + LENGTH_OF_TRANSACTION_DATE_STRING
80
+ }
81
+ @debit_column = {
82
+ start: @val_date_column[:start] + LENGTH_OF_TRANSACTION_DATE_STRING,
83
+ end: column_header_line.index(DEBIT_COLUMN_HEADER_STRING) + 5
84
+ }
85
+ @credit_column = {
86
+ start: @debit_column[:end] + 1,
87
+ end: column_header_line.index(CREDIT_COLUMN_HEADER_STRING) + 7
88
+ }
89
+ @balance_column = {
90
+ start: column_header_line.index(BALANCE_COLUMN_HEADER_STRING),
91
+ end: -1
92
+ }
93
+ @columns = {
94
+ date_column: @date_column,
95
+ remarks_column: @remarks_column,
96
+ cheque_column: @cheque_column,
97
+ val_date_column: @val_date_column,
98
+ debit_column: @debit_column,
99
+ credit_column: @credit_column,
100
+ balance_column: @balance_column
101
+ }
102
+
103
+ rescue NoMethodError
104
+ return invalid_file
105
+ end
106
+
107
+ end
108
+
109
+ end
110
+ end
@@ -0,0 +1,24 @@
1
+ require 'httmultiparty'
2
+
3
+ module NgBankParser
4
+ class PDFUnlocker
5
+ include HTTMultiParty
6
+ base_uri 'http://pdf-unlocker.herokuapp.com'
7
+
8
+ def initialize(file, password)
9
+ @pdf = file
10
+ @password = password
11
+ end
12
+
13
+
14
+ def unlocked_pdf
15
+ options = { :pdf => @pdf, :password => @password }
16
+ begin
17
+ response = self.class.post('/rest/pdf/unlock', :query => options, :detect_mime_type => true).parsed_response
18
+ rescue StandardError
19
+ response = nil
20
+ end
21
+ end
22
+
23
+ end
24
+ end