extras_de_cont 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -4
- data/extras_de_cont.gemspec +2 -1
- data/lib/extras_de_cont/rules/brd.rb +0 -1
- data/lib/extras_de_cont/rules/ing.rb +173 -0
- data/lib/extras_de_cont/rules/revolut.rb +0 -1
- data/lib/extras_de_cont/rules/unicredit.rb +0 -1
- data/lib/extras_de_cont.rb +16 -9
- metadata +16 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f9393dd1d6d9cec4a77e34393af8a0ccb3459c0486835f7df0eed74410cff435
|
|
4
|
+
data.tar.gz: f5008d24c8f24b1f2b7a0d0bda8a675a2c886fba09e86b9d91bb888c7a1d7b92
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 07e61564409929f0c316b834ac18ad00d54a4a0a9f6b1f221c012e3064f5ad046de1324781545455f6417fb4198450b49773613e1d59b936a8410e25286238e3
|
|
7
|
+
data.tar.gz: 64cce605262cdcb5a31aa164b76e6f108f8b24149f136c92d237b888dcd894813cda0fce0bf8e801862e162ecfbd88e095cc479c61661f2bab9054bb822b08a0
|
data/README.md
CHANGED
|
@@ -38,11 +38,12 @@ ruby -Ilib:test test/revolut_rule_test.rb
|
|
|
38
38
|
|
|
39
39
|
## Supported Banks
|
|
40
40
|
|
|
41
|
-
| Bank
|
|
42
|
-
|
|
43
|
-
| Revolut
|
|
41
|
+
| Bank | Symbol | Currencies | Features |
|
|
42
|
+
|-----------|--------------|---|--------------------------------------------------------|
|
|
43
|
+
| Revolut | `:revolut` | RON, EUR, USD, GBP, PLN, CZK, HUF, BGN, TRY, UAH | Personal & Business, multi-section, symbol currencies |
|
|
44
44
|
| UniCredit | `:unicredit` | RON, EUR | Romanian month names, page breaks, transaction markers |
|
|
45
|
-
| BRD
|
|
45
|
+
| BRD | `:brd` | RON, EUR | Below-line amounts, Romanian number format |
|
|
46
|
+
| ING | `:ing` | RON | Normal bank statements in RON |
|
|
46
47
|
|
|
47
48
|
## Development
|
|
48
49
|
|
data/extras_de_cont.gemspec
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
Gem::Specification.new do |s|
|
|
4
4
|
s.name = "extras_de_cont"
|
|
5
|
-
s.version = "1.
|
|
5
|
+
s.version = "1.4.0"
|
|
6
6
|
s.licenses = ["GPLv3"]
|
|
7
7
|
s.summary = "A simple library which helps you extract transactions from a PDF bank statement."
|
|
8
8
|
s.description = <<~TEXT
|
|
@@ -25,4 +25,5 @@ Gem::Specification.new do |s|
|
|
|
25
25
|
s.require_paths = ["lib"]
|
|
26
26
|
|
|
27
27
|
s.add_dependency "pdf-reader", "~> 2.15"
|
|
28
|
+
s.add_dependency "zeitwerk", "~> 2.8"
|
|
28
29
|
end
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "date"
|
|
4
|
+
|
|
5
|
+
module ExtrasDeCont
|
|
6
|
+
module Rules
|
|
7
|
+
class Ing < Rules::Base
|
|
8
|
+
ROMANIAN_MONTHS = {
|
|
9
|
+
"ianuarie" => 1, "februarie" => 2, "martie" => 3, "aprilie" => 4,
|
|
10
|
+
"mai" => 5, "iunie" => 6, "iulie" => 7, "august" => 8,
|
|
11
|
+
"septembrie" => 9, "octombrie" => 10, "noiembrie" => 11, "decembrie" => 12
|
|
12
|
+
}.freeze
|
|
13
|
+
|
|
14
|
+
RO_MONTH_NAMES = ROMANIAN_MONTHS.keys.freeze
|
|
15
|
+
DATE_PATTERN = /\b(\d{1,2})\s+(#{RO_MONTH_NAMES.join("|")})\s+(\d{4})\b/i
|
|
16
|
+
DATE_PREFIX = /\A\s*#{DATE_PATTERN}/
|
|
17
|
+
|
|
18
|
+
TABLE_HEADER_PATTERN = /Data\s+Detalii tranzactie\s+Debit\s+Credit/
|
|
19
|
+
|
|
20
|
+
AMOUNT_PATTERN = /\d{1,3}(?:\.\d{3})*,\d{2}/
|
|
21
|
+
|
|
22
|
+
NOISE_PATTERNS = [
|
|
23
|
+
/\AExtras de cont\z/,
|
|
24
|
+
/\APentru perioada:/,
|
|
25
|
+
/\AValabil fara semnatura/,
|
|
26
|
+
/\AING Bank/,
|
|
27
|
+
/\ASediul:/,
|
|
28
|
+
/\ANr\. inregistrare/,
|
|
29
|
+
/\ACIF:/,
|
|
30
|
+
/\ATitular cont:/,
|
|
31
|
+
/\ACNP:/,
|
|
32
|
+
/\AStr\. /,
|
|
33
|
+
/\ATip cont:/,
|
|
34
|
+
/\ANumar cont:/,
|
|
35
|
+
/\AMoneda:/,
|
|
36
|
+
/\A\d{6},/,
|
|
37
|
+
/\ARoxana Petria/,
|
|
38
|
+
/\AAlexandra Ilie/,
|
|
39
|
+
/\AȘef Serviciu/,
|
|
40
|
+
/\ASef Serviciu/,
|
|
41
|
+
/\ASucursala/,
|
|
42
|
+
/\AÎN/,
|
|
43
|
+
/\AInformatii despre/,
|
|
44
|
+
/\Ape www\./,
|
|
45
|
+
/\d+\/\d+$/
|
|
46
|
+
].freeze
|
|
47
|
+
|
|
48
|
+
def parse(text)
|
|
49
|
+
transactions = []
|
|
50
|
+
current_currency = nil
|
|
51
|
+
current_table = nil
|
|
52
|
+
above_lines = []
|
|
53
|
+
below_lines = []
|
|
54
|
+
date_line = nil
|
|
55
|
+
|
|
56
|
+
each_normalized_line(text) do |line|
|
|
57
|
+
if line.start_with?("Moneda:")
|
|
58
|
+
current_currency = line.split.last
|
|
59
|
+
next
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
if table_header?(line)
|
|
63
|
+
try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
|
|
64
|
+
current_table = extract_column_positions(line)
|
|
65
|
+
above_lines, below_lines, date_line = [], [], nil
|
|
66
|
+
next
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
if noise?(line)
|
|
70
|
+
try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
|
|
71
|
+
above_lines, below_lines, date_line = [], [], nil
|
|
72
|
+
next
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
next if current_table.nil?
|
|
76
|
+
|
|
77
|
+
if date_line?(line)
|
|
78
|
+
try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
|
|
79
|
+
date_line = line
|
|
80
|
+
below_lines = []
|
|
81
|
+
next
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
if date_line
|
|
85
|
+
below_lines << line
|
|
86
|
+
else
|
|
87
|
+
above_lines << line
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
|
|
92
|
+
transactions
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
private
|
|
96
|
+
|
|
97
|
+
def each_normalized_line(text)
|
|
98
|
+
text.each_line do |line|
|
|
99
|
+
normalized = line.tr("\u00A0", " ").strip
|
|
100
|
+
next if normalized.empty?
|
|
101
|
+
yield normalized
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def table_header?(line)
|
|
106
|
+
line.match?(TABLE_HEADER_PATTERN)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def date_line?(line)
|
|
110
|
+
line.match?(DATE_PREFIX)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def noise?(line)
|
|
114
|
+
NOISE_PATTERNS.any? { |pattern| line.match?(pattern) }
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def extract_column_positions(line)
|
|
118
|
+
{
|
|
119
|
+
debit: line.index("Debit"),
|
|
120
|
+
credit: line.index("Credit")
|
|
121
|
+
}
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def try_flush(date_line, above_lines, below_lines, table, currency, transactions)
|
|
125
|
+
return if date_line.nil? || table.nil?
|
|
126
|
+
|
|
127
|
+
transaction = build_transaction(date_line, above_lines, below_lines, table, currency)
|
|
128
|
+
transactions << transaction if transaction
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def build_transaction(date_line, above_lines, below_lines, table, currency)
|
|
132
|
+
date_match = date_line.match(DATE_PREFIX)
|
|
133
|
+
return if date_match.nil?
|
|
134
|
+
|
|
135
|
+
amounts = date_line.to_enum(:scan, AMOUNT_PATTERN).map { Regexp.last_match }
|
|
136
|
+
return if amounts.empty?
|
|
137
|
+
|
|
138
|
+
transaction_amount_match = amounts.last
|
|
139
|
+
description_start = date_match.end(0)
|
|
140
|
+
description_end = transaction_amount_match.begin(0)
|
|
141
|
+
main_description = date_line[description_start...description_end].to_s.strip
|
|
142
|
+
|
|
143
|
+
amount_string = transaction_amount_match[0]
|
|
144
|
+
amount = parse_amount(amount_string)
|
|
145
|
+
midpoint = (table[:debit] + table[:credit]) / 2
|
|
146
|
+
amount = -amount if transaction_amount_match.begin(0) < midpoint
|
|
147
|
+
|
|
148
|
+
description = build_description(main_description, above_lines, below_lines)
|
|
149
|
+
|
|
150
|
+
Transaction.new(
|
|
151
|
+
parse_date(date_match[1].to_i, date_match[2], date_match[3].to_i),
|
|
152
|
+
description,
|
|
153
|
+
amount,
|
|
154
|
+
currency
|
|
155
|
+
)
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
def parse_date(day, month_name, year)
|
|
159
|
+
month = ROMANIAN_MONTHS[month_name.downcase]
|
|
160
|
+
Date.new(year, month, day)
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def parse_amount(value)
|
|
164
|
+
value.delete(".").sub(",", ".").to_f
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def build_description(main_desc, above_lines, below_lines)
|
|
168
|
+
parts = [*above_lines.map(&:strip), main_desc, *below_lines.map(&:strip)]
|
|
169
|
+
parts.reject(&:empty?).join(" | ")
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
end
|
data/lib/extras_de_cont.rb
CHANGED
|
@@ -1,18 +1,23 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
require
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
3
|
+
|
|
4
|
+
require 'zeitwerk'
|
|
5
|
+
loader = Zeitwerk::Loader.for_gem
|
|
6
|
+
loader.inflector.inflect(
|
|
7
|
+
"unicredit" => "UniCredit"
|
|
8
|
+
)
|
|
9
|
+
loader.setup # ready!
|
|
10
|
+
|
|
11
|
+
|
|
8
12
|
|
|
9
13
|
# The ExtrasDeCont module contains utilities for parsing bank statements.
|
|
10
14
|
module ExtrasDeCont
|
|
11
15
|
# Map of supported banks (symbol → rule class)
|
|
12
16
|
BANK_RULES = {
|
|
13
|
-
brd: Rules::Brd,
|
|
14
|
-
|
|
15
|
-
|
|
17
|
+
brd: ExtrasDeCont::Rules::Brd,
|
|
18
|
+
ing: ExtrasDeCont::Rules::Ing,
|
|
19
|
+
revolut: ExtrasDeCont::Rules::Revolut,
|
|
20
|
+
unicredit: ExtrasDeCont::Rules::UniCredit
|
|
16
21
|
}.freeze
|
|
17
22
|
|
|
18
23
|
class << self
|
|
@@ -26,8 +31,10 @@ module ExtrasDeCont
|
|
|
26
31
|
rule_class = BANK_RULES[bank]
|
|
27
32
|
raise ArgumentError, "Unsupported bank: #{bank}. Supported banks: #{BANK_RULES.keys.join(", ")}" unless rule_class
|
|
28
33
|
|
|
29
|
-
p = Parser.new(file)
|
|
34
|
+
p = ExtrasDeCont::Parser.new(file)
|
|
30
35
|
p.parse_with(rule_class.new)
|
|
31
36
|
end
|
|
32
37
|
end
|
|
33
38
|
end
|
|
39
|
+
|
|
40
|
+
loader.eager_load # optionally
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: extras_de_cont
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Denis Nutiu
|
|
@@ -23,6 +23,20 @@ dependencies:
|
|
|
23
23
|
- - "~>"
|
|
24
24
|
- !ruby/object:Gem::Version
|
|
25
25
|
version: '2.15'
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: zeitwerk
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - "~>"
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: '2.8'
|
|
33
|
+
type: :runtime
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - "~>"
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: '2.8'
|
|
26
40
|
description: |
|
|
27
41
|
A simple library which helps you extract transactions from a PDF bank statement.
|
|
28
42
|
Fine tuned for Romanian bank statements.
|
|
@@ -40,6 +54,7 @@ files:
|
|
|
40
54
|
- lib/extras_de_cont/parser.rb
|
|
41
55
|
- lib/extras_de_cont/rules/base.rb
|
|
42
56
|
- lib/extras_de_cont/rules/brd.rb
|
|
57
|
+
- lib/extras_de_cont/rules/ing.rb
|
|
43
58
|
- lib/extras_de_cont/rules/revolut.rb
|
|
44
59
|
- lib/extras_de_cont/rules/unicredit.rb
|
|
45
60
|
- lib/extras_de_cont/transaction.rb
|