banktools-se 2.1.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 74491115cff66b4bf05d2d842ebc5e244580c253
4
- data.tar.gz: 51f456d3da82641e22dc0c236c84b0fdf9b1b66e
3
+ metadata.gz: 1c5c3cd7492d627e0bd743ad56328b7d97daad3a
4
+ data.tar.gz: e83b11dbab0e65165fcbcf2ed490770624023ea9
5
5
  SHA512:
6
- metadata.gz: b28bb0e72521ab8641fa2545002a4a86e6add6856d526c6a98f681002265aa3a4b8f531959c330beba6552fc6138fbd45f598f8fb9023092d9474b91ce491669
7
- data.tar.gz: b61fb6be0c11bf1c784dec49d58a4ed199a9adfc244fe119aa2152e2e60c198fe70ca3d74a057bc6ac3985eff4fab3c5aac0a40b6681d8d2715a0a8a2ec1624a
6
+ metadata.gz: 173c075e3289cd074802e1b0530b2cfa4dcf11a087bec1bc6f1582252ab44c67d785ebf239e77e14930ec3db646d820098bded4fed99646e136b30fccfed7061
7
+ data.tar.gz: 67767a7305bb6425c53200c8e664e9635ce2162c319f70772bd3747a00a104589c874d78429a25769802468a1097f910ca77c533ee388b40cac2ce48cb249173
data/README.markdown CHANGED
@@ -45,8 +45,10 @@ Inspired by [iulianu/iban-tools](https://github.com/iulianu/iban-tools). Please
45
45
 
46
46
  # This feature is intended to try to find all OCR numbers in a noisy bank statement string.
47
47
  # By design it may find too many numbers (e.g. valid substrings of other numbers), so you should check results against actual outstanding invoices.
48
- BankTools::SE::OCR.find_all_in_string("OCR1230 and ref4564 and 789") # => [ "1230", "4564" ]
48
+ # By default, it excludes OCRs shorter than 4 digits, but this limit can be specified per below.
49
+ BankTools::SE::OCR.find_all_in_string("OCR1230 and ref4564 and 7890") # => [ "1230", "4564" ]
49
50
  BankTools::SE::OCR.find_all_in_string("1230 and 123067", length_digit: true, pad: "0") # => [ "123067" ]
51
+ BankTools::SE::OCR.find_all_in_string("00 and 18", min_length: 2) # => [ "00", "18" ]
50
52
 
51
53
  # Plusgiro
52
54
 
@@ -5,6 +5,7 @@ module BankTools
5
5
  class OCR
6
6
  class InvalidOCR < StandardError; end
7
7
  class OverlongOCR < InvalidOCR; end
8
+ class TooShortOCR < InvalidOCR; end
8
9
  class BadPadding < InvalidOCR; end
9
10
  class BadLengthDigit < InvalidOCR; end
10
11
  class BadChecksum < InvalidOCR; end
@@ -28,23 +29,24 @@ module BankTools
28
29
 
29
30
  length = number_with_ocr.length
30
31
  if length > MAX_LENGTH
31
- raise OverlongOCR, "Bankgiro OCR must be #{MIN_LENGTH} - #{MAX_LENGTH} characters (this one would be #{length} characters)"
32
+ raise OverlongOCR, "OCR must be #{MIN_LENGTH} - #{MAX_LENGTH} characters (this one would be #{length} characters)"
32
33
  end
33
34
 
34
35
  number_with_ocr
35
36
  end
36
37
 
37
- def self.to_number(number, length_digit: false, pad: "")
38
- number = number.to_s
38
+ def self.to_number(ocr, length_digit: false, pad: "")
39
+ ocr = ocr.to_s
39
40
  should_have_length_digit = length_digit
40
41
  strip_padding = pad.to_s
41
42
 
42
- raise MustBeNumeric unless number.match(/\A\d+\z/)
43
- raise BadChecksum unless Utils.valid_luhn?(number)
43
+ raise MustBeNumeric unless ocr.match(/\A\d+\z/)
44
+ raise BadChecksum unless Utils.valid_luhn?(ocr)
45
+ raise TooShortOCR if ocr.length < MIN_LENGTH
44
46
 
45
47
  if should_have_length_digit
46
- length_digit = number[-2]
47
- last_digit_of_actual_length = number.length.to_s[-1]
48
+ length_digit = ocr[-2]
49
+ last_digit_of_actual_length = ocr.length.to_s[-1]
48
50
  raise BadLengthDigit if length_digit != last_digit_of_actual_length
49
51
  end
50
52
 
@@ -54,16 +56,23 @@ module BankTools
54
56
  if strip_padding.length > 0
55
57
  expected_padding_end = -digits_to_chop - 1
56
58
  expected_padding_start = expected_padding_end - strip_padding.length + 1
57
- raise BadPadding if number[expected_padding_start..expected_padding_end] != strip_padding
59
+ raise BadPadding if ocr[expected_padding_start..expected_padding_end] != strip_padding
58
60
  end
59
61
 
60
62
  digits_to_chop += strip_padding.length
61
63
 
62
- number[0...-digits_to_chop]
64
+ ocr[0...-digits_to_chop]
63
65
  end
64
66
 
65
- def self.find_all_in_string(string, length_digit: false, pad: "")
66
- string.scan(/\d+/).select { |candidate|
67
+ def self.find_all_in_string(string, length_digit: false, pad: "", min_length: 4)
68
+ expanded_string = string + " " + string.gsub("\n", "") + " " + string.gsub(";", "")
69
+
70
+ numbers = expanded_string.scan(/\d+/)
71
+
72
+ expanded_numbers = with_numbers_found_by_removing_prefix_and_postfix(numbers).
73
+ reject { |n| n.length < min_length }
74
+
75
+ expanded_numbers.select { |candidate|
67
76
  begin
68
77
  to_number(candidate, length_digit: length_digit, pad: pad)
69
78
  true
@@ -72,6 +81,20 @@ module BankTools
72
81
  end
73
82
  }.uniq
74
83
  end
84
+
85
+ private
86
+
87
+ private_class_method \
88
+ def self.with_numbers_found_by_removing_prefix_and_postfix(numbers)
89
+ numbers + numbers.flat_map { |number|
90
+ 0.upto(number.size).flat_map { |i|
91
+ [
92
+ number[0...i],
93
+ number[i...number.size],
94
+ ]
95
+ }
96
+ }
97
+ end
75
98
  end
76
99
  end
77
100
  end
@@ -1,5 +1,5 @@
1
1
  module BankTools
2
2
  module SE
3
- VERSION = "2.1.0"
3
+ VERSION = "2.3.0"
4
4
  end
5
5
  end
data/spec/ocr_spec.rb CHANGED
@@ -46,6 +46,11 @@ describe BankTools::SE::OCR do
46
46
  BankTools::SE::OCR.to_number("1234567890037", length_digit: true, pad: "0").should eq "1234567890"
47
47
  end
48
48
 
49
+ it "raises if the given number is too short to be a valid OCR" do
50
+ expect { BankTools::SE::OCR.to_number("0") }.to raise_error(BankTools::SE::OCR::TooShortOCR)
51
+ expect { BankTools::SE::OCR.to_number("00") }.not_to raise_error
52
+ end
53
+
49
54
  it "raises if checksum is wrong" do
50
55
  expect { BankTools::SE::OCR.to_number("1231") }.to raise_error(BankTools::SE::OCR::BadChecksum)
51
56
  end
@@ -72,7 +77,7 @@ describe BankTools::SE::OCR do
72
77
  expect(BankTools::SE::OCR.find_all_in_string("1230 1234 4564")).to eq [ "1230", "4564" ]
73
78
  end
74
79
 
75
- it "requires OCRs to comply with length_digit and pad options" do
80
+ it "requires OCRs to comply with the specified length_digit and pad options" do
76
81
  string = "1230 4564 123067 456061"
77
82
  expect(BankTools::SE::OCR.find_all_in_string(string)).to eq [ "1230", "4564", "123067", "456061" ]
78
83
  expect(BankTools::SE::OCR.find_all_in_string(string, length_digit: true, pad: "0")).to eq [ "123067", "456061" ]
@@ -82,6 +87,39 @@ describe BankTools::SE::OCR do
82
87
  expect(BankTools::SE::OCR.find_all_in_string("x1230x")).to eq [ "1230" ]
83
88
  end
84
89
 
90
+ it "handles OCR numbers both separated and split by newlines" do
91
+ expect(BankTools::SE::OCR.find_all_in_string("1230\n4564")).to include "1230", "4564", "12304564"
92
+ expect(BankTools::SE::OCR.find_all_in_string("45\n64")).to eq [ "4564" ]
93
+ end
94
+
95
+ it "handles OCR numbers both separated and split by semicolons" do
96
+ expect(BankTools::SE::OCR.find_all_in_string("1230;4564")).to include "1230", "4564", "12304564"
97
+ expect(BankTools::SE::OCR.find_all_in_string("45;64")).to eq [ "4564" ]
98
+ end
99
+
100
+ it "handles numbers smushed together" do
101
+ # "Ref 1: 1230" with characters gone missing.
102
+ expect(BankTools::SE::OCR.find_all_in_string("REF 11230")).to include "1230"
103
+
104
+ # Two OCRs without separation.
105
+ expect(BankTools::SE::OCR.find_all_in_string("12304564")).to include "1230", "4564"
106
+
107
+ # Amount smushed into OCR.
108
+ expect(BankTools::SE::OCR.find_all_in_string("EHRENKRONAAUFTR: EUR 17,183188720001 PAYMENT")).to include "3188720001"
109
+
110
+ # OCR smushed into item ID.
111
+ string = "Referenznummer 3201675000187604. HISTORISTISCHER SALONTISCH."
112
+ expect(BankTools::SE::OCR.find_all_in_string(string)).to include "3201675000"
113
+ end
114
+
115
+ it "lets you configure the accepted OCR min_length" do
116
+ expect(BankTools::SE::OCR.find_all_in_string("12304564")).to eq [ "12304564", "04564", "1230", "4564" ]
117
+ expect(BankTools::SE::OCR.find_all_in_string("12304564", min_length: 6)).to eq [ "12304564" ]
118
+
119
+ expect(BankTools::SE::OCR.find_all_in_string("1234")).to eq []
120
+ expect(BankTools::SE::OCR.find_all_in_string("1234", min_length: 2)).to eq [ "34" ]
121
+ end
122
+
85
123
  it "excludes duplicates" do
86
124
  expect(BankTools::SE::OCR.find_all_in_string("1230 1230 4564")).to eq [ "1230", "4564" ]
87
125
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: banktools-se
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.0
4
+ version: 2.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Henrik Nyh