banktools-se 2.1.0 → 2.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 74491115cff66b4bf05d2d842ebc5e244580c253
4
- data.tar.gz: 51f456d3da82641e22dc0c236c84b0fdf9b1b66e
3
+ metadata.gz: 1c5c3cd7492d627e0bd743ad56328b7d97daad3a
4
+ data.tar.gz: e83b11dbab0e65165fcbcf2ed490770624023ea9
5
5
  SHA512:
6
- metadata.gz: b28bb0e72521ab8641fa2545002a4a86e6add6856d526c6a98f681002265aa3a4b8f531959c330beba6552fc6138fbd45f598f8fb9023092d9474b91ce491669
7
- data.tar.gz: b61fb6be0c11bf1c784dec49d58a4ed199a9adfc244fe119aa2152e2e60c198fe70ca3d74a057bc6ac3985eff4fab3c5aac0a40b6681d8d2715a0a8a2ec1624a
6
+ metadata.gz: 173c075e3289cd074802e1b0530b2cfa4dcf11a087bec1bc6f1582252ab44c67d785ebf239e77e14930ec3db646d820098bded4fed99646e136b30fccfed7061
7
+ data.tar.gz: 67767a7305bb6425c53200c8e664e9635ce2162c319f70772bd3747a00a104589c874d78429a25769802468a1097f910ca77c533ee388b40cac2ce48cb249173
data/README.markdown CHANGED
@@ -45,8 +45,10 @@ Inspired by [iulianu/iban-tools](https://github.com/iulianu/iban-tools). Please
45
45
 
46
46
  # This feature is intended to try to find all OCR numbers in a noisy bank statement string.
47
47
  # By design it may find too many numbers (e.g. valid substrings of other numbers), so you should check results against actual outstanding invoices.
48
- BankTools::SE::OCR.find_all_in_string("OCR1230 and ref4564 and 789") # => [ "1230", "4564" ]
48
+ # By default, it excludes OCRs shorter than 4 digits, but this limit can be specified per below.
49
+ BankTools::SE::OCR.find_all_in_string("OCR1230 and ref4564 and 7890") # => [ "1230", "4564" ]
49
50
  BankTools::SE::OCR.find_all_in_string("1230 and 123067", length_digit: true, pad: "0") # => [ "123067" ]
51
+ BankTools::SE::OCR.find_all_in_string("00 and 18", min_length: 2) # => [ "00", "18" ]
50
52
 
51
53
  # Plusgiro
52
54
 
@@ -5,6 +5,7 @@ module BankTools
5
5
  class OCR
6
6
  class InvalidOCR < StandardError; end
7
7
  class OverlongOCR < InvalidOCR; end
8
+ class TooShortOCR < InvalidOCR; end
8
9
  class BadPadding < InvalidOCR; end
9
10
  class BadLengthDigit < InvalidOCR; end
10
11
  class BadChecksum < InvalidOCR; end
@@ -28,23 +29,24 @@ module BankTools
28
29
 
29
30
  length = number_with_ocr.length
30
31
  if length > MAX_LENGTH
31
- raise OverlongOCR, "Bankgiro OCR must be #{MIN_LENGTH} - #{MAX_LENGTH} characters (this one would be #{length} characters)"
32
+ raise OverlongOCR, "OCR must be #{MIN_LENGTH} - #{MAX_LENGTH} characters (this one would be #{length} characters)"
32
33
  end
33
34
 
34
35
  number_with_ocr
35
36
  end
36
37
 
37
- def self.to_number(number, length_digit: false, pad: "")
38
- number = number.to_s
38
+ def self.to_number(ocr, length_digit: false, pad: "")
39
+ ocr = ocr.to_s
39
40
  should_have_length_digit = length_digit
40
41
  strip_padding = pad.to_s
41
42
 
42
- raise MustBeNumeric unless number.match(/\A\d+\z/)
43
- raise BadChecksum unless Utils.valid_luhn?(number)
43
+ raise MustBeNumeric unless ocr.match(/\A\d+\z/)
44
+ raise BadChecksum unless Utils.valid_luhn?(ocr)
45
+ raise TooShortOCR if ocr.length < MIN_LENGTH
44
46
 
45
47
  if should_have_length_digit
46
- length_digit = number[-2]
47
- last_digit_of_actual_length = number.length.to_s[-1]
48
+ length_digit = ocr[-2]
49
+ last_digit_of_actual_length = ocr.length.to_s[-1]
48
50
  raise BadLengthDigit if length_digit != last_digit_of_actual_length
49
51
  end
50
52
 
@@ -54,16 +56,23 @@ module BankTools
54
56
  if strip_padding.length > 0
55
57
  expected_padding_end = -digits_to_chop - 1
56
58
  expected_padding_start = expected_padding_end - strip_padding.length + 1
57
- raise BadPadding if number[expected_padding_start..expected_padding_end] != strip_padding
59
+ raise BadPadding if ocr[expected_padding_start..expected_padding_end] != strip_padding
58
60
  end
59
61
 
60
62
  digits_to_chop += strip_padding.length
61
63
 
62
- number[0...-digits_to_chop]
64
+ ocr[0...-digits_to_chop]
63
65
  end
64
66
 
65
- def self.find_all_in_string(string, length_digit: false, pad: "")
66
- string.scan(/\d+/).select { |candidate|
67
+ def self.find_all_in_string(string, length_digit: false, pad: "", min_length: 4)
68
+ expanded_string = string + " " + string.gsub("\n", "") + " " + string.gsub(";", "")
69
+
70
+ numbers = expanded_string.scan(/\d+/)
71
+
72
+ expanded_numbers = with_numbers_found_by_removing_prefix_and_postfix(numbers).
73
+ reject { |n| n.length < min_length }
74
+
75
+ expanded_numbers.select { |candidate|
67
76
  begin
68
77
  to_number(candidate, length_digit: length_digit, pad: pad)
69
78
  true
@@ -72,6 +81,20 @@ module BankTools
72
81
  end
73
82
  }.uniq
74
83
  end
84
+
85
+ private
86
+
87
+ private_class_method \
88
+ def self.with_numbers_found_by_removing_prefix_and_postfix(numbers)
89
+ numbers + numbers.flat_map { |number|
90
+ 0.upto(number.size).flat_map { |i|
91
+ [
92
+ number[0...i],
93
+ number[i...number.size],
94
+ ]
95
+ }
96
+ }
97
+ end
75
98
  end
76
99
  end
77
100
  end
@@ -1,5 +1,5 @@
1
1
  module BankTools
2
2
  module SE
3
- VERSION = "2.1.0"
3
+ VERSION = "2.3.0"
4
4
  end
5
5
  end
data/spec/ocr_spec.rb CHANGED
@@ -46,6 +46,11 @@ describe BankTools::SE::OCR do
46
46
  BankTools::SE::OCR.to_number("1234567890037", length_digit: true, pad: "0").should eq "1234567890"
47
47
  end
48
48
 
49
+ it "raises if the given number is too short to be a valid OCR" do
50
+ expect { BankTools::SE::OCR.to_number("0") }.to raise_error(BankTools::SE::OCR::TooShortOCR)
51
+ expect { BankTools::SE::OCR.to_number("00") }.not_to raise_error
52
+ end
53
+
49
54
  it "raises if checksum is wrong" do
50
55
  expect { BankTools::SE::OCR.to_number("1231") }.to raise_error(BankTools::SE::OCR::BadChecksum)
51
56
  end
@@ -72,7 +77,7 @@ describe BankTools::SE::OCR do
72
77
  expect(BankTools::SE::OCR.find_all_in_string("1230 1234 4564")).to eq [ "1230", "4564" ]
73
78
  end
74
79
 
75
- it "requires OCRs to comply with length_digit and pad options" do
80
+ it "requires OCRs to comply with the specified length_digit and pad options" do
76
81
  string = "1230 4564 123067 456061"
77
82
  expect(BankTools::SE::OCR.find_all_in_string(string)).to eq [ "1230", "4564", "123067", "456061" ]
78
83
  expect(BankTools::SE::OCR.find_all_in_string(string, length_digit: true, pad: "0")).to eq [ "123067", "456061" ]
@@ -82,6 +87,39 @@ describe BankTools::SE::OCR do
82
87
  expect(BankTools::SE::OCR.find_all_in_string("x1230x")).to eq [ "1230" ]
83
88
  end
84
89
 
90
+ it "handles OCR numbers both separated and split by newlines" do
91
+ expect(BankTools::SE::OCR.find_all_in_string("1230\n4564")).to include "1230", "4564", "12304564"
92
+ expect(BankTools::SE::OCR.find_all_in_string("45\n64")).to eq [ "4564" ]
93
+ end
94
+
95
+ it "handles OCR numbers both separated and split by semicolons" do
96
+ expect(BankTools::SE::OCR.find_all_in_string("1230;4564")).to include "1230", "4564", "12304564"
97
+ expect(BankTools::SE::OCR.find_all_in_string("45;64")).to eq [ "4564" ]
98
+ end
99
+
100
+ it "handles numbers smushed together" do
101
+ # "Ref 1: 1230" with characters gone missing.
102
+ expect(BankTools::SE::OCR.find_all_in_string("REF 11230")).to include "1230"
103
+
104
+ # Two OCRs without separation.
105
+ expect(BankTools::SE::OCR.find_all_in_string("12304564")).to include "1230", "4564"
106
+
107
+ # Amount smushed into OCR.
108
+ expect(BankTools::SE::OCR.find_all_in_string("EHRENKRONAAUFTR: EUR 17,183188720001 PAYMENT")).to include "3188720001"
109
+
110
+ # OCR smushed into item ID.
111
+ string = "Referenznummer 3201675000187604. HISTORISTISCHER SALONTISCH."
112
+ expect(BankTools::SE::OCR.find_all_in_string(string)).to include "3201675000"
113
+ end
114
+
115
+ it "lets you configure the accepted OCR min_length" do
116
+ expect(BankTools::SE::OCR.find_all_in_string("12304564")).to eq [ "12304564", "04564", "1230", "4564" ]
117
+ expect(BankTools::SE::OCR.find_all_in_string("12304564", min_length: 6)).to eq [ "12304564" ]
118
+
119
+ expect(BankTools::SE::OCR.find_all_in_string("1234")).to eq []
120
+ expect(BankTools::SE::OCR.find_all_in_string("1234", min_length: 2)).to eq [ "34" ]
121
+ end
122
+
85
123
  it "excludes duplicates" do
86
124
  expect(BankTools::SE::OCR.find_all_in_string("1230 1230 4564")).to eq [ "1230", "4564" ]
87
125
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: banktools-se
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.0
4
+ version: 2.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Henrik Nyh