banktools-se 2.1.0 → 2.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.markdown +3 -1
- data/lib/banktools-se/ocr.rb +34 -11
- data/lib/banktools-se/version.rb +1 -1
- data/spec/ocr_spec.rb +39 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1c5c3cd7492d627e0bd743ad56328b7d97daad3a
|
4
|
+
data.tar.gz: e83b11dbab0e65165fcbcf2ed490770624023ea9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 173c075e3289cd074802e1b0530b2cfa4dcf11a087bec1bc6f1582252ab44c67d785ebf239e77e14930ec3db646d820098bded4fed99646e136b30fccfed7061
|
7
|
+
data.tar.gz: 67767a7305bb6425c53200c8e664e9635ce2162c319f70772bd3747a00a104589c874d78429a25769802468a1097f910ca77c533ee388b40cac2ce48cb249173
|
data/README.markdown
CHANGED
@@ -45,8 +45,10 @@ Inspired by [iulianu/iban-tools](https://github.com/iulianu/iban-tools). Please
|
|
45
45
|
|
46
46
|
# This feature is intended to try to find all OCR numbers in a noisy bank statement string.
|
47
47
|
# By design it may find too many numbers (e.g. valid substrings of other numbers), so you should check results against actual outstanding invoices.
|
48
|
-
|
48
|
+
# By default, it excludes OCRs shorter than 4 digits, but this limit can be specified per below.
|
49
|
+
BankTools::SE::OCR.find_all_in_string("OCR1230 and ref4564 and 7890") # => [ "1230", "4564" ]
|
49
50
|
BankTools::SE::OCR.find_all_in_string("1230 and 123067", length_digit: true, pad: "0") # => [ "123067" ]
|
51
|
+
BankTools::SE::OCR.find_all_in_string("00 and 18", min_length: 2) # => [ "00", "18" ]
|
50
52
|
|
51
53
|
# Plusgiro
|
52
54
|
|
data/lib/banktools-se/ocr.rb
CHANGED
@@ -5,6 +5,7 @@ module BankTools
|
|
5
5
|
class OCR
|
6
6
|
class InvalidOCR < StandardError; end
|
7
7
|
class OverlongOCR < InvalidOCR; end
|
8
|
+
class TooShortOCR < InvalidOCR; end
|
8
9
|
class BadPadding < InvalidOCR; end
|
9
10
|
class BadLengthDigit < InvalidOCR; end
|
10
11
|
class BadChecksum < InvalidOCR; end
|
@@ -28,23 +29,24 @@ module BankTools
|
|
28
29
|
|
29
30
|
length = number_with_ocr.length
|
30
31
|
if length > MAX_LENGTH
|
31
|
-
raise OverlongOCR, "
|
32
|
+
raise OverlongOCR, "OCR must be #{MIN_LENGTH} - #{MAX_LENGTH} characters (this one would be #{length} characters)"
|
32
33
|
end
|
33
34
|
|
34
35
|
number_with_ocr
|
35
36
|
end
|
36
37
|
|
37
|
-
def self.to_number(
|
38
|
-
|
38
|
+
def self.to_number(ocr, length_digit: false, pad: "")
|
39
|
+
ocr = ocr.to_s
|
39
40
|
should_have_length_digit = length_digit
|
40
41
|
strip_padding = pad.to_s
|
41
42
|
|
42
|
-
raise MustBeNumeric unless
|
43
|
-
raise BadChecksum unless Utils.valid_luhn?(
|
43
|
+
raise MustBeNumeric unless ocr.match(/\A\d+\z/)
|
44
|
+
raise BadChecksum unless Utils.valid_luhn?(ocr)
|
45
|
+
raise TooShortOCR if ocr.length < MIN_LENGTH
|
44
46
|
|
45
47
|
if should_have_length_digit
|
46
|
-
length_digit =
|
47
|
-
last_digit_of_actual_length =
|
48
|
+
length_digit = ocr[-2]
|
49
|
+
last_digit_of_actual_length = ocr.length.to_s[-1]
|
48
50
|
raise BadLengthDigit if length_digit != last_digit_of_actual_length
|
49
51
|
end
|
50
52
|
|
@@ -54,16 +56,23 @@ module BankTools
|
|
54
56
|
if strip_padding.length > 0
|
55
57
|
expected_padding_end = -digits_to_chop - 1
|
56
58
|
expected_padding_start = expected_padding_end - strip_padding.length + 1
|
57
|
-
raise BadPadding if
|
59
|
+
raise BadPadding if ocr[expected_padding_start..expected_padding_end] != strip_padding
|
58
60
|
end
|
59
61
|
|
60
62
|
digits_to_chop += strip_padding.length
|
61
63
|
|
62
|
-
|
64
|
+
ocr[0...-digits_to_chop]
|
63
65
|
end
|
64
66
|
|
65
|
-
def self.find_all_in_string(string, length_digit: false, pad: "")
|
66
|
-
string.
|
67
|
+
def self.find_all_in_string(string, length_digit: false, pad: "", min_length: 4)
|
68
|
+
expanded_string = string + " " + string.gsub("\n", "") + " " + string.gsub(";", "")
|
69
|
+
|
70
|
+
numbers = expanded_string.scan(/\d+/)
|
71
|
+
|
72
|
+
expanded_numbers = with_numbers_found_by_removing_prefix_and_postfix(numbers).
|
73
|
+
reject { |n| n.length < min_length }
|
74
|
+
|
75
|
+
expanded_numbers.select { |candidate|
|
67
76
|
begin
|
68
77
|
to_number(candidate, length_digit: length_digit, pad: pad)
|
69
78
|
true
|
@@ -72,6 +81,20 @@ module BankTools
|
|
72
81
|
end
|
73
82
|
}.uniq
|
74
83
|
end
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
private_class_method \
|
88
|
+
def self.with_numbers_found_by_removing_prefix_and_postfix(numbers)
|
89
|
+
numbers + numbers.flat_map { |number|
|
90
|
+
0.upto(number.size).flat_map { |i|
|
91
|
+
[
|
92
|
+
number[0...i],
|
93
|
+
number[i...number.size],
|
94
|
+
]
|
95
|
+
}
|
96
|
+
}
|
97
|
+
end
|
75
98
|
end
|
76
99
|
end
|
77
100
|
end
|
data/lib/banktools-se/version.rb
CHANGED
data/spec/ocr_spec.rb
CHANGED
@@ -46,6 +46,11 @@ describe BankTools::SE::OCR do
|
|
46
46
|
BankTools::SE::OCR.to_number("1234567890037", length_digit: true, pad: "0").should eq "1234567890"
|
47
47
|
end
|
48
48
|
|
49
|
+
it "raises if the given number is too short to be a valid OCR" do
|
50
|
+
expect { BankTools::SE::OCR.to_number("0") }.to raise_error(BankTools::SE::OCR::TooShortOCR)
|
51
|
+
expect { BankTools::SE::OCR.to_number("00") }.not_to raise_error
|
52
|
+
end
|
53
|
+
|
49
54
|
it "raises if checksum is wrong" do
|
50
55
|
expect { BankTools::SE::OCR.to_number("1231") }.to raise_error(BankTools::SE::OCR::BadChecksum)
|
51
56
|
end
|
@@ -72,7 +77,7 @@ describe BankTools::SE::OCR do
|
|
72
77
|
expect(BankTools::SE::OCR.find_all_in_string("1230 1234 4564")).to eq [ "1230", "4564" ]
|
73
78
|
end
|
74
79
|
|
75
|
-
it "requires OCRs to comply with length_digit and pad options" do
|
80
|
+
it "requires OCRs to comply with the specified length_digit and pad options" do
|
76
81
|
string = "1230 4564 123067 456061"
|
77
82
|
expect(BankTools::SE::OCR.find_all_in_string(string)).to eq [ "1230", "4564", "123067", "456061" ]
|
78
83
|
expect(BankTools::SE::OCR.find_all_in_string(string, length_digit: true, pad: "0")).to eq [ "123067", "456061" ]
|
@@ -82,6 +87,39 @@ describe BankTools::SE::OCR do
|
|
82
87
|
expect(BankTools::SE::OCR.find_all_in_string("x1230x")).to eq [ "1230" ]
|
83
88
|
end
|
84
89
|
|
90
|
+
it "handles OCR numbers both separated and split by newlines" do
|
91
|
+
expect(BankTools::SE::OCR.find_all_in_string("1230\n4564")).to include "1230", "4564", "12304564"
|
92
|
+
expect(BankTools::SE::OCR.find_all_in_string("45\n64")).to eq [ "4564" ]
|
93
|
+
end
|
94
|
+
|
95
|
+
it "handles OCR numbers both separated and split by semicolons" do
|
96
|
+
expect(BankTools::SE::OCR.find_all_in_string("1230;4564")).to include "1230", "4564", "12304564"
|
97
|
+
expect(BankTools::SE::OCR.find_all_in_string("45;64")).to eq [ "4564" ]
|
98
|
+
end
|
99
|
+
|
100
|
+
it "handles numbers smushed together" do
|
101
|
+
# "Ref 1: 1230" with characters gone missing.
|
102
|
+
expect(BankTools::SE::OCR.find_all_in_string("REF 11230")).to include "1230"
|
103
|
+
|
104
|
+
# Two OCRs without separation.
|
105
|
+
expect(BankTools::SE::OCR.find_all_in_string("12304564")).to include "1230", "4564"
|
106
|
+
|
107
|
+
# Amount smushed into OCR.
|
108
|
+
expect(BankTools::SE::OCR.find_all_in_string("EHRENKRONAAUFTR: EUR 17,183188720001 PAYMENT")).to include "3188720001"
|
109
|
+
|
110
|
+
# OCR smushed into item ID.
|
111
|
+
string = "Referenznummer 3201675000187604. HISTORISTISCHER SALONTISCH."
|
112
|
+
expect(BankTools::SE::OCR.find_all_in_string(string)).to include "3201675000"
|
113
|
+
end
|
114
|
+
|
115
|
+
it "lets you configure the accepted OCR min_length" do
|
116
|
+
expect(BankTools::SE::OCR.find_all_in_string("12304564")).to eq [ "12304564", "04564", "1230", "4564" ]
|
117
|
+
expect(BankTools::SE::OCR.find_all_in_string("12304564", min_length: 6)).to eq [ "12304564" ]
|
118
|
+
|
119
|
+
expect(BankTools::SE::OCR.find_all_in_string("1234")).to eq []
|
120
|
+
expect(BankTools::SE::OCR.find_all_in_string("1234", min_length: 2)).to eq [ "34" ]
|
121
|
+
end
|
122
|
+
|
85
123
|
it "excludes duplicates" do
|
86
124
|
expect(BankTools::SE::OCR.find_all_in_string("1230 1230 4564")).to eq [ "1230", "4564" ]
|
87
125
|
end
|