ng-bank-parser 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +35 -0
  3. data/.rspec +2 -0
  4. data/CODE_OF_CONDUCT.md +13 -0
  5. data/Gemfile +4 -0
  6. data/Gemfile.lock +80 -0
  7. data/LICENSE +340 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +92 -0
  10. data/Rakefile +6 -0
  11. data/bin/console +14 -0
  12. data/bin/setup +7 -0
  13. data/lib/ng-bank-parser.rb +11 -0
  14. data/lib/ng-bank-parser/banks.rb +24 -0
  15. data/lib/ng-bank-parser/fixtures/firstbank-pdf-invalid.xlsx +0 -0
  16. data/lib/ng-bank-parser/fixtures/firstbank-pdf-valid.pdf +0 -0
  17. data/lib/ng-bank-parser/fixtures/gtb-excel-invalid.pdf +0 -0
  18. data/lib/ng-bank-parser/fixtures/gtb-excel-valid.xls +1332 -0
  19. data/lib/ng-bank-parser/fixtures/gtb-excel-valid.xlsx +0 -0
  20. data/lib/ng-bank-parser/fixtures/uba-pdf-invalid.pdf +0 -0
  21. data/lib/ng-bank-parser/fixtures/uba-pdf-valid.pdf +0 -0
  22. data/lib/ng-bank-parser/parsers/firstbank-pdf-parser.rb +81 -0
  23. data/lib/ng-bank-parser/parsers/firstbank-pdf-parser/helpers.rb +172 -0
  24. data/lib/ng-bank-parser/parsers/firstbank-pdf-parser/statement_utils.rb +51 -0
  25. data/lib/ng-bank-parser/parsers/gtb-excel-parser.rb +66 -0
  26. data/lib/ng-bank-parser/parsers/gtb-excel-parser/helpers.rb +121 -0
  27. data/lib/ng-bank-parser/parsers/uba-pdf-parser.rb +98 -0
  28. data/lib/ng-bank-parser/parsers/uba-pdf-parser/classes/string.rb +56 -0
  29. data/lib/ng-bank-parser/parsers/uba-pdf-parser/classes/transaction.rb +73 -0
  30. data/lib/ng-bank-parser/parsers/uba-pdf-parser/constants.rb +32 -0
  31. data/lib/ng-bank-parser/parsers/uba-pdf-parser/pdf_checks.rb +58 -0
  32. data/lib/ng-bank-parser/parsers/uba-pdf-parser/transaction_tools.rb +110 -0
  33. data/lib/ng-bank-parser/pdf-unlocker.rb +24 -0
  34. data/lib/ng-bank-parser/router.rb +44 -0
  35. data/lib/ng-bank-parser/version.rb +3 -0
  36. data/ng-bank-parser.gemspec +31 -0
  37. metadata +198 -0
@@ -0,0 +1,98 @@
1
+ require 'pdf-reader'
2
+ require 'open-uri'
3
+ require_relative 'uba-pdf-parser/transaction_tools'
4
+
5
+
6
+ module NgBankParser
7
+ class UbaPdf
8
+ extend TransactionTools
9
+
10
+ class << self
11
+ def parse(url, password = nil)
12
+ unless ACCEPTED_FORMATS.include? File.extname(url)
13
+ return invalid_file
14
+ end
15
+
16
+ file = open(url)
17
+ @reader = PDF::Reader.new(file)
18
+ set_up_first_page
19
+ if is_valid_pdf?
20
+ set_account_details
21
+ set_transactions
22
+ return return_payload
23
+ else
24
+ return invalid_file
25
+ end
26
+ end
27
+
28
+ private
29
+
30
+ def set_up_first_page
31
+ @first_page_text = @reader.pages.first.text.remove_empty_lines
32
+ set_column_positions(@first_page_text.lines[TABLE_HEADER_LINE_INDEX])
33
+ end
34
+
35
+ def set_account_details
36
+ set_bank_name
37
+ set_account_name
38
+ set_account_number
39
+ set_dates
40
+ # print_account_details
41
+ end
42
+
43
+ def print_account_details
44
+ puts @bank_name
45
+ puts @account_name
46
+ puts @account_number
47
+ puts @from_date
48
+ puts @to_date
49
+ end
50
+
51
+ def set_bank_name
52
+ @bank_name = "United Bank for Africa"
53
+ end
54
+
55
+ def set_account_name
56
+ account_name_line = @first_page_text.lines[ACCOUNT_NAME_LINE_INDEX]
57
+ @account_name = account_name_line.get_text_between_markers(ACCOUNT_NAME_START_MARKER, ACCOUNT_NAME_END_MARKER)
58
+ end
59
+
60
+ def set_account_number
61
+ account_number_line = @first_page_text.lines[ACCOUNT_NUMBER_LINE_INDEX]
62
+ account_number_line_string = account_number_line.remove_white_spaces.downcase!
63
+ @account_number = account_number_line_string.get_text_between_markers(ACCOUNT_NUMBER_STRING, CURRENCY_STRING).upcase!
64
+ end
65
+
66
+ def set_dates
67
+ date_line = @first_page_text.lines[1]
68
+ date_line_string = date_line.remove_white_spaces.downcase!
69
+ @from_date = date_line_string.get_text_between_markers(START_DATE_STRING, END_DATE_STRING).upcase!.convert_to_date
70
+ @to_date = date_line_string.get_text_after_marker(END_DATE_STRING).upcase!.convert_to_date
71
+ end
72
+
73
+
74
+ def return_payload
75
+ return {
76
+ status: 1,
77
+ data: {
78
+ bank_name: @bank_name,
79
+ account_number: @account_number,
80
+ account_name: @account_name,
81
+ from_date: @from_date,
82
+ to_date: @to_date,
83
+ transactions: @transactions_hashes_array
84
+ }
85
+ }
86
+ end
87
+
88
+ def invalid_file
89
+ return {
90
+ status: 0, message: INVALID_FILE_STRING
91
+ }
92
+ end
93
+
94
+ end
95
+ end
96
+ end
97
+
98
+
@@ -0,0 +1,56 @@
1
+ class String
2
+ def extract_column(column)
3
+ self[column[:start]..column[:end]]
4
+ end
5
+
6
+ def remove_empty_lines
7
+ self.gsub /^$\n/, ''
8
+ end
9
+
10
+ def remove_multiple_lines
11
+ self.gsub('\n', '')
12
+ end
13
+
14
+ def remove_white_spaces
15
+ self.gsub(/\s+/, "")
16
+ end
17
+
18
+ def reduce_to_singular_white_space
19
+ self.gsub(/\s+/, " ")
20
+ end
21
+
22
+ def remove_commas
23
+ return self.gsub(/,/, '')
24
+ end
25
+
26
+
27
+ def get_text_between_markers(marker1, marker2)
28
+ self[/#{Regexp.escape(marker1)}(.*?)#{Regexp.escape(marker2)}/m, 1]
29
+ end
30
+
31
+ def get_text_after_marker(marker)
32
+ self.partition(marker).last
33
+ end
34
+
35
+ def get_date_string
36
+ self[0...10]
37
+ end
38
+
39
+ def get_first_line
40
+ self.lines[0]
41
+ end
42
+
43
+ def is_date?
44
+ begin
45
+ Date.parse(self)
46
+ rescue ArgumentError
47
+ return false
48
+ end
49
+ return true
50
+ end
51
+
52
+ def convert_to_date
53
+ Date.strptime(self, '%d-%b-%Y')
54
+ end
55
+
56
+ end
@@ -0,0 +1,73 @@
1
+ require 'active_support/json/decoding.rb'
2
+ require 'active_support/json/encoding.rb'
3
+
4
+
5
+ class Transaction
6
+
7
+ TYPE = {
8
+ debit: 'debit',
9
+ credit: 'credit'
10
+ }
11
+
12
+
13
+ def initialize(string, columns)
14
+ set_date_from_transaction_string(string)
15
+ set_amount_and_type_from_transaction_string(string, columns[:debit_column], columns[:credit_column])
16
+ set_balance_from_transaction_string(string, columns[:balance_column])
17
+ set_remarks_from_transaction_string(string, columns[:remarks_column])
18
+ set_ref
19
+ end
20
+
21
+ def to_hash
22
+ {
23
+ date: @date,
24
+ amount: @amount,
25
+ type: @type,
26
+ balance: @balance,
27
+ remarks: @remarks,
28
+ ref: @ref
29
+ }
30
+ end
31
+
32
+ private
33
+
34
+ def set_date_from_transaction_string(string)
35
+ @date = Date.strptime(string.get_first_line.get_date_string, '%d-%m-%Y')
36
+ end
37
+
38
+ def set_amount_and_type_from_transaction_string(string, debit_column, credit_column)
39
+ debit = string.get_first_line.extract_column(debit_column).remove_white_spaces
40
+ credit = string.get_first_line.extract_column(credit_column).remove_white_spaces
41
+
42
+ if (debit.blank? && !credit.blank?)
43
+ @type = TYPE[:credit]
44
+ @amount = credit.remove_commas.to_f
45
+ elsif (credit.blank? && !debit.blank?)
46
+ @type = TYPE[:debit]
47
+ @amount = debit.remove_commas.to_f
48
+ end
49
+ end
50
+
51
+ def set_balance_from_transaction_string(string, balance_column)
52
+ @balance = string.get_first_line.extract_column(balance_column).remove_white_spaces.remove_commas.to_f
53
+ end
54
+
55
+ def set_remarks_from_transaction_string(string, remarks_column)
56
+ @remarks ||= ''
57
+ if string.lines.count == 1
58
+ @remarks = string.extract_column(remarks_column)
59
+ elsif string.lines.count > 1
60
+ string.lines.each do |line|
61
+ @remarks = @remarks.+ line.extract_column(remarks_column)
62
+ end
63
+ end
64
+ @remarks = @remarks.reduce_to_singular_white_space
65
+ end
66
+
67
+ def set_ref
68
+ @ref = '0'
69
+ end
70
+
71
+
72
+ end
73
+
@@ -0,0 +1,32 @@
1
+ module NgBankParser
2
+ module Constants
3
+ ACCEPTED_FORMATS = ['.pdf']
4
+
5
+ TABLE_HEADER_LINE_INDEX = 6
6
+ LENGTH_OF_TRANSACTION_DATE_STRING = 10
7
+ ACCOUNT_NAME_LINE_INDEX = 2
8
+ ACCOUNT_NUMBER_LINE_INDEX = 5
9
+ RANGE_OF_LINES_WITH_TRANSACTIONS = 7..-3
10
+
11
+
12
+ FIRST_LINE_CHECKER_STRING = 'statementoftransactions'
13
+ START_DATE_STRING = 'startdate:'
14
+ END_DATE_STRING = 'enddate:'
15
+ ACCOUNT_NUMBER_STRING = 'accountnumber:'
16
+ CURRENCY_STRING = 'currency'
17
+ ACCOUNT_NAME_START_MARKER = '( '
18
+ ACCOUNT_NAME_END_MARKER = ' )'
19
+
20
+
21
+ DATE_COLUMN_HEADER_STRING = 'date'
22
+ REMARKS_COLUMN_HEADER_STRING = 'description'
23
+ CHEQUE_NUMBER_COLUMN_HEADER_STRING = 'chq. no'
24
+ VAL_DATE_COLUMN_HEADER_STRING = 'val date'
25
+ DEBIT_COLUMN_HEADER_STRING = 'debit'
26
+ CREDIT_COLUMN_HEADER_STRING = 'credit'
27
+ BALANCE_COLUMN_HEADER_STRING = 'balance'
28
+
29
+ COLUMN_OFFSET_RANGE = 0..-2
30
+ INVALID_FILE_STRING = 'Invalid File'
31
+ end
32
+ end
@@ -0,0 +1,58 @@
1
+ require_relative 'constants'
2
+
3
+ module NgBankParser
4
+ module PdfChecks
5
+ include Constants
6
+
7
+ private
8
+
9
+ def is_valid_pdf?
10
+ is_first_line_of_first_page_correct? && is_date_range_line_correct? && is_account_number_line_correct? && are_columns_correct?
11
+ end
12
+
13
+
14
+ def is_first_line_of_first_page_correct?
15
+ line_string = @first_page_text.lines[0].remove_white_spaces.downcase!
16
+ return line_string.end_with?(FIRST_LINE_CHECKER_STRING)
17
+ end
18
+
19
+ def is_date_range_line_correct?
20
+ date_line = @first_page_text.lines[1]
21
+ date_line_string = date_line.remove_white_spaces.downcase!
22
+ return date_line_string.get_text_between_markers(START_DATE_STRING, END_DATE_STRING).upcase!.is_date? && date_line_string.get_text_after_marker(START_DATE_STRING).upcase!.is_date?
23
+ end
24
+
25
+ def is_account_number_line_correct?
26
+ account_number_line = @first_page_text.lines[ACCOUNT_NUMBER_LINE_INDEX]
27
+ return account_number_line.remove_white_spaces.downcase!.start_with?(ACCOUNT_NUMBER_STRING)
28
+ end
29
+
30
+ def are_columns_correct?
31
+ is_date_column_correct? && is_remarks_column_correct? && is_debit_column_correct? && is_credit_column_correct? && is_balance_column_correct?
32
+ end
33
+
34
+ def is_date_column_correct?
35
+ @first_page_text.lines[TABLE_HEADER_LINE_INDEX]
36
+ .extract_column(@date_column)
37
+ .remove_white_spaces.downcase![COLUMN_OFFSET_RANGE] == DATE_COLUMN_HEADER_STRING
38
+ end
39
+
40
+ def is_remarks_column_correct?
41
+ @first_page_text.lines[TABLE_HEADER_LINE_INDEX].extract_column(@remarks_column).remove_white_spaces.downcase![COLUMN_OFFSET_RANGE] == REMARKS_COLUMN_HEADER_STRING
42
+ end
43
+
44
+ def is_debit_column_correct?
45
+ @first_page_text.lines[TABLE_HEADER_LINE_INDEX].extract_column(@debit_column).remove_white_spaces.downcase! == DEBIT_COLUMN_HEADER_STRING
46
+ end
47
+
48
+ def is_credit_column_correct?
49
+ @first_page_text.lines[TABLE_HEADER_LINE_INDEX].extract_column(@credit_column).remove_white_spaces.downcase! == CREDIT_COLUMN_HEADER_STRING
50
+ end
51
+
52
+ def is_balance_column_correct?
53
+ @first_page_text.lines[TABLE_HEADER_LINE_INDEX].extract_column(@balance_column).remove_white_spaces.downcase! == BALANCE_COLUMN_HEADER_STRING
54
+ end
55
+
56
+
57
+ end
58
+ end
@@ -0,0 +1,110 @@
1
+ require 'date'
2
+ require 'active_support/core_ext/object/blank'
3
+ require_relative 'classes/string'
4
+ require_relative 'classes/transaction'
5
+ require_relative 'pdf_checks'
6
+
7
+
8
+ module NgBankParser
9
+ module TransactionTools
10
+ include PdfChecks
11
+
12
+ private
13
+
14
+ def set_transactions
15
+ @transactions ||= Array.new
16
+ @transaction_strings ||= Array.new
17
+ @reader.pages[0..-2].each do |page|
18
+ seperate_lines_into_transaction_strings(page.text.remove_empty_lines.lines)
19
+ end
20
+ @transaction_strings.compact!
21
+ @transaction_strings.each do |transaction_string|
22
+ transaction = Transaction.new(transaction_string, @columns)
23
+ @transactions << transaction
24
+ end
25
+ @transactions.shift
26
+ set_transactions_hashes_array
27
+ end
28
+
29
+ def set_transactions_hashes_array
30
+ @transactions_hashes_array = []
31
+ @transactions.each do |transaction|
32
+ @transactions_hashes_array << transaction.to_hash
33
+ end
34
+ end
35
+
36
+ def seperate_lines_into_transaction_strings(lines)
37
+ @lines = lines[RANGE_OF_LINES_WITH_TRANSACTIONS]
38
+ @lines.each_with_index do |line, index|
39
+ if line_has_date?(line)
40
+ transaction_string = line
41
+ next_lines = get_next_lines(index)
42
+ next_lines.each do |next_line|
43
+ if !line_has_date?(next_line)
44
+ transaction_string = transaction_string + next_line
45
+ else
46
+ break
47
+ end
48
+ end
49
+ end
50
+ @transaction_strings << transaction_string
51
+ end
52
+ end
53
+
54
+ def line_has_date?(line)
55
+ line.get_date_string.is_date?
56
+ end
57
+
58
+ def get_next_lines(index)
59
+ @lines[index+1..-1]
60
+ end
61
+
62
+ def set_column_positions(column_header_line)
63
+ begin
64
+ column_header_line.downcase!
65
+ @date_column = {
66
+ start: column_header_line.index(DATE_COLUMN_HEADER_STRING),
67
+ end: column_header_line.index(REMARKS_COLUMN_HEADER_STRING)
68
+ }
69
+ @remarks_column = {
70
+ start: column_header_line.index(REMARKS_COLUMN_HEADER_STRING),
71
+ end: column_header_line.index(CHEQUE_NUMBER_COLUMN_HEADER_STRING)
72
+ }
73
+ @cheque_column = {
74
+ start: column_header_line.index(CHEQUE_NUMBER_COLUMN_HEADER_STRING),
75
+ end: column_header_line.index(VAL_DATE_COLUMN_HEADER_STRING)
76
+ }
77
+ @val_date_column = {
78
+ start: column_header_line.index(VAL_DATE_COLUMN_HEADER_STRING),
79
+ end: column_header_line.index(VAL_DATE_COLUMN_HEADER_STRING) + LENGTH_OF_TRANSACTION_DATE_STRING
80
+ }
81
+ @debit_column = {
82
+ start: @val_date_column[:start] + LENGTH_OF_TRANSACTION_DATE_STRING,
83
+ end: column_header_line.index(DEBIT_COLUMN_HEADER_STRING) + 5
84
+ }
85
+ @credit_column = {
86
+ start: @debit_column[:end] + 1,
87
+ end: column_header_line.index(CREDIT_COLUMN_HEADER_STRING) + 7
88
+ }
89
+ @balance_column = {
90
+ start: column_header_line.index(BALANCE_COLUMN_HEADER_STRING),
91
+ end: -1
92
+ }
93
+ @columns = {
94
+ date_column: @date_column,
95
+ remarks_column: @remarks_column,
96
+ cheque_column: @cheque_column,
97
+ val_date_column: @val_date_column,
98
+ debit_column: @debit_column,
99
+ credit_column: @credit_column,
100
+ balance_column: @balance_column
101
+ }
102
+
103
+ rescue NoMethodError
104
+ return invalid_file
105
+ end
106
+
107
+ end
108
+
109
+ end
110
+ end
@@ -0,0 +1,24 @@
1
+ require 'httmultiparty'
2
+
3
+ module NgBankParser
4
+ class PDFUnlocker
5
+ include HTTMultiParty
6
+ base_uri 'http://pdf-unlocker.herokuapp.com'
7
+
8
+ def initialize(file, password)
9
+ @pdf = file
10
+ @password = password
11
+ end
12
+
13
+
14
+ def unlocked_pdf
15
+ options = { :pdf => @pdf, :password => @password }
16
+ begin
17
+ response = self.class.post('/rest/pdf/unlock', :query => options, :detect_mime_type => true).parsed_response
18
+ rescue StandardError
19
+ response = nil
20
+ end
21
+ end
22
+
23
+ end
24
+ end