banktools-se 2.1.0 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.markdown +3 -1
- data/lib/banktools-se/ocr.rb +34 -11
- data/lib/banktools-se/version.rb +1 -1
- data/spec/ocr_spec.rb +39 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1c5c3cd7492d627e0bd743ad56328b7d97daad3a
|
4
|
+
data.tar.gz: e83b11dbab0e65165fcbcf2ed490770624023ea9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 173c075e3289cd074802e1b0530b2cfa4dcf11a087bec1bc6f1582252ab44c67d785ebf239e77e14930ec3db646d820098bded4fed99646e136b30fccfed7061
|
7
|
+
data.tar.gz: 67767a7305bb6425c53200c8e664e9635ce2162c319f70772bd3747a00a104589c874d78429a25769802468a1097f910ca77c533ee388b40cac2ce48cb249173
|
data/README.markdown
CHANGED
@@ -45,8 +45,10 @@ Inspired by [iulianu/iban-tools](https://github.com/iulianu/iban-tools). Please
|
|
45
45
|
|
46
46
|
# This feature is intended to try to find all OCR numbers in a noisy bank statement string.
|
47
47
|
# By design it may find too many numbers (e.g. valid substrings of other numbers), so you should check results against actual outstanding invoices.
|
48
|
-
|
48
|
+
# By default, it excludes OCRs shorter than 4 digits, but this limit can be specified per below.
|
49
|
+
BankTools::SE::OCR.find_all_in_string("OCR1230 and ref4564 and 7890") # => [ "1230", "4564" ]
|
49
50
|
BankTools::SE::OCR.find_all_in_string("1230 and 123067", length_digit: true, pad: "0") # => [ "123067" ]
|
51
|
+
BankTools::SE::OCR.find_all_in_string("00 and 18", min_length: 2) # => [ "00", "18" ]
|
50
52
|
|
51
53
|
# Plusgiro
|
52
54
|
|
data/lib/banktools-se/ocr.rb
CHANGED
@@ -5,6 +5,7 @@ module BankTools
|
|
5
5
|
class OCR
|
6
6
|
class InvalidOCR < StandardError; end
|
7
7
|
class OverlongOCR < InvalidOCR; end
|
8
|
+
class TooShortOCR < InvalidOCR; end
|
8
9
|
class BadPadding < InvalidOCR; end
|
9
10
|
class BadLengthDigit < InvalidOCR; end
|
10
11
|
class BadChecksum < InvalidOCR; end
|
@@ -28,23 +29,24 @@ module BankTools
|
|
28
29
|
|
29
30
|
length = number_with_ocr.length
|
30
31
|
if length > MAX_LENGTH
|
31
|
-
raise OverlongOCR, "
|
32
|
+
raise OverlongOCR, "OCR must be #{MIN_LENGTH} - #{MAX_LENGTH} characters (this one would be #{length} characters)"
|
32
33
|
end
|
33
34
|
|
34
35
|
number_with_ocr
|
35
36
|
end
|
36
37
|
|
37
|
-
def self.to_number(
|
38
|
-
|
38
|
+
def self.to_number(ocr, length_digit: false, pad: "")
|
39
|
+
ocr = ocr.to_s
|
39
40
|
should_have_length_digit = length_digit
|
40
41
|
strip_padding = pad.to_s
|
41
42
|
|
42
|
-
raise MustBeNumeric unless
|
43
|
-
raise BadChecksum unless Utils.valid_luhn?(
|
43
|
+
raise MustBeNumeric unless ocr.match(/\A\d+\z/)
|
44
|
+
raise BadChecksum unless Utils.valid_luhn?(ocr)
|
45
|
+
raise TooShortOCR if ocr.length < MIN_LENGTH
|
44
46
|
|
45
47
|
if should_have_length_digit
|
46
|
-
length_digit =
|
47
|
-
last_digit_of_actual_length =
|
48
|
+
length_digit = ocr[-2]
|
49
|
+
last_digit_of_actual_length = ocr.length.to_s[-1]
|
48
50
|
raise BadLengthDigit if length_digit != last_digit_of_actual_length
|
49
51
|
end
|
50
52
|
|
@@ -54,16 +56,23 @@ module BankTools
|
|
54
56
|
if strip_padding.length > 0
|
55
57
|
expected_padding_end = -digits_to_chop - 1
|
56
58
|
expected_padding_start = expected_padding_end - strip_padding.length + 1
|
57
|
-
raise BadPadding if
|
59
|
+
raise BadPadding if ocr[expected_padding_start..expected_padding_end] != strip_padding
|
58
60
|
end
|
59
61
|
|
60
62
|
digits_to_chop += strip_padding.length
|
61
63
|
|
62
|
-
|
64
|
+
ocr[0...-digits_to_chop]
|
63
65
|
end
|
64
66
|
|
65
|
-
def self.find_all_in_string(string, length_digit: false, pad: "")
|
66
|
-
string.
|
67
|
+
def self.find_all_in_string(string, length_digit: false, pad: "", min_length: 4)
|
68
|
+
expanded_string = string + " " + string.gsub("\n", "") + " " + string.gsub(";", "")
|
69
|
+
|
70
|
+
numbers = expanded_string.scan(/\d+/)
|
71
|
+
|
72
|
+
expanded_numbers = with_numbers_found_by_removing_prefix_and_postfix(numbers).
|
73
|
+
reject { |n| n.length < min_length }
|
74
|
+
|
75
|
+
expanded_numbers.select { |candidate|
|
67
76
|
begin
|
68
77
|
to_number(candidate, length_digit: length_digit, pad: pad)
|
69
78
|
true
|
@@ -72,6 +81,20 @@ module BankTools
|
|
72
81
|
end
|
73
82
|
}.uniq
|
74
83
|
end
|
84
|
+
|
85
|
+
private
|
86
|
+
|
87
|
+
private_class_method \
|
88
|
+
def self.with_numbers_found_by_removing_prefix_and_postfix(numbers)
|
89
|
+
numbers + numbers.flat_map { |number|
|
90
|
+
0.upto(number.size).flat_map { |i|
|
91
|
+
[
|
92
|
+
number[0...i],
|
93
|
+
number[i...number.size],
|
94
|
+
]
|
95
|
+
}
|
96
|
+
}
|
97
|
+
end
|
75
98
|
end
|
76
99
|
end
|
77
100
|
end
|
data/lib/banktools-se/version.rb
CHANGED
data/spec/ocr_spec.rb
CHANGED
@@ -46,6 +46,11 @@ describe BankTools::SE::OCR do
|
|
46
46
|
BankTools::SE::OCR.to_number("1234567890037", length_digit: true, pad: "0").should eq "1234567890"
|
47
47
|
end
|
48
48
|
|
49
|
+
it "raises if the given number is too short to be a valid OCR" do
|
50
|
+
expect { BankTools::SE::OCR.to_number("0") }.to raise_error(BankTools::SE::OCR::TooShortOCR)
|
51
|
+
expect { BankTools::SE::OCR.to_number("00") }.not_to raise_error
|
52
|
+
end
|
53
|
+
|
49
54
|
it "raises if checksum is wrong" do
|
50
55
|
expect { BankTools::SE::OCR.to_number("1231") }.to raise_error(BankTools::SE::OCR::BadChecksum)
|
51
56
|
end
|
@@ -72,7 +77,7 @@ describe BankTools::SE::OCR do
|
|
72
77
|
expect(BankTools::SE::OCR.find_all_in_string("1230 1234 4564")).to eq [ "1230", "4564" ]
|
73
78
|
end
|
74
79
|
|
75
|
-
it "requires OCRs to comply with length_digit and pad options" do
|
80
|
+
it "requires OCRs to comply with the specified length_digit and pad options" do
|
76
81
|
string = "1230 4564 123067 456061"
|
77
82
|
expect(BankTools::SE::OCR.find_all_in_string(string)).to eq [ "1230", "4564", "123067", "456061" ]
|
78
83
|
expect(BankTools::SE::OCR.find_all_in_string(string, length_digit: true, pad: "0")).to eq [ "123067", "456061" ]
|
@@ -82,6 +87,39 @@ describe BankTools::SE::OCR do
|
|
82
87
|
expect(BankTools::SE::OCR.find_all_in_string("x1230x")).to eq [ "1230" ]
|
83
88
|
end
|
84
89
|
|
90
|
+
it "handles OCR numbers both separated and split by newlines" do
|
91
|
+
expect(BankTools::SE::OCR.find_all_in_string("1230\n4564")).to include "1230", "4564", "12304564"
|
92
|
+
expect(BankTools::SE::OCR.find_all_in_string("45\n64")).to eq [ "4564" ]
|
93
|
+
end
|
94
|
+
|
95
|
+
it "handles OCR numbers both separated and split by semicolons" do
|
96
|
+
expect(BankTools::SE::OCR.find_all_in_string("1230;4564")).to include "1230", "4564", "12304564"
|
97
|
+
expect(BankTools::SE::OCR.find_all_in_string("45;64")).to eq [ "4564" ]
|
98
|
+
end
|
99
|
+
|
100
|
+
it "handles numbers smushed together" do
|
101
|
+
# "Ref 1: 1230" with characters gone missing.
|
102
|
+
expect(BankTools::SE::OCR.find_all_in_string("REF 11230")).to include "1230"
|
103
|
+
|
104
|
+
# Two OCRs without separation.
|
105
|
+
expect(BankTools::SE::OCR.find_all_in_string("12304564")).to include "1230", "4564"
|
106
|
+
|
107
|
+
# Amount smushed into OCR.
|
108
|
+
expect(BankTools::SE::OCR.find_all_in_string("EHRENKRONAAUFTR: EUR 17,183188720001 PAYMENT")).to include "3188720001"
|
109
|
+
|
110
|
+
# OCR smushed into item ID.
|
111
|
+
string = "Referenznummer 3201675000187604. HISTORISTISCHER SALONTISCH."
|
112
|
+
expect(BankTools::SE::OCR.find_all_in_string(string)).to include "3201675000"
|
113
|
+
end
|
114
|
+
|
115
|
+
it "lets you configure the accepted OCR min_length" do
|
116
|
+
expect(BankTools::SE::OCR.find_all_in_string("12304564")).to eq [ "12304564", "04564", "1230", "4564" ]
|
117
|
+
expect(BankTools::SE::OCR.find_all_in_string("12304564", min_length: 6)).to eq [ "12304564" ]
|
118
|
+
|
119
|
+
expect(BankTools::SE::OCR.find_all_in_string("1234")).to eq []
|
120
|
+
expect(BankTools::SE::OCR.find_all_in_string("1234", min_length: 2)).to eq [ "34" ]
|
121
|
+
end
|
122
|
+
|
85
123
|
it "excludes duplicates" do
|
86
124
|
expect(BankTools::SE::OCR.find_all_in_string("1230 1230 4564")).to eq [ "1230", "4564" ]
|
87
125
|
end
|