extras_de_cont 1.0.2 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +75 -0
- data/extras_de_cont.gemspec +3 -3
- data/lib/extras_de_cont/rules/brd.rb +185 -0
- data/lib/extras_de_cont/rules/revolut.rb +58 -11
- data/lib/extras_de_cont/rules/unicredit.rb +197 -0
- data/lib/extras_de_cont.rb +4 -2
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 6dde609d769123f6ba26d34fc0efa79b82ad91b18f6f84d725362480f6c9ef3a
|
|
4
|
+
data.tar.gz: 236b56eb1c323f3dafc2278135424f5cd51edb336fdd45462c0c73a1e1db83af
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 48a679a6514c868d895d68ba56ed821e4528b7b48d9acb90638f16a5d723b2736e7e67afd6fdd85481ec1c9980318e16bc823091f564e6bc1963c554bc505e6c
|
|
7
|
+
data.tar.gz: a31df94d256b541092714e9d54351cd785e46dd0e58120c593699733cb9f90530548bcce9040d5cdfa36c421d775c80335f95a19f24711918549e454c1fee147
|
data/README.md
CHANGED
|
@@ -35,3 +35,78 @@ Run the Revolut parser test with:
|
|
|
35
35
|
```bash
|
|
36
36
|
ruby -Ilib:test test/revolut_rule_test.rb
|
|
37
37
|
```
|
|
38
|
+
|
|
39
|
+
## Supported Banks
|
|
40
|
+
|
|
41
|
+
| Bank | Symbol | Currencies | Features |
|
|
42
|
+
|---|---|---|---|
|
|
43
|
+
| Revolut | `:revolut` | RON, EUR, USD, GBP, PLN, CZK, HUF, BGN, TRY, UAH | Personal & Business, multi-section, symbol currencies |
|
|
44
|
+
| UniCredit | `:unicredit` | RON, EUR | Romanian month names, page breaks, transaction markers |
|
|
45
|
+
| BRD | `:brd` | RON, EUR | Below-line amounts, Romanian number format |
|
|
46
|
+
|
|
47
|
+
## Development
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
bundle install # Install dependencies
|
|
51
|
+
bundle exec rake test # Run all tests
|
|
52
|
+
bundle exec rake standard # Run linter
|
|
53
|
+
bundle exec rake build # Build gem
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Contributing
|
|
57
|
+
|
|
58
|
+
Contributions are welcome. Here is how to add a new bank:
|
|
59
|
+
|
|
60
|
+
### 1. Gather information
|
|
61
|
+
|
|
62
|
+
Obtain a sample PDF statement from the bank and extract its text:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
ruby -Ilib -e 'require "extras_de_cont"; puts ExtrasDeCont::Parser.new(ARGV[0]).text' /path/to/statement.pdf
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### 2. Create a rule class
|
|
69
|
+
|
|
70
|
+
Add `lib/extras_de_cont/rules/<bank>.rb` inheriting from `Rules::Base`. See existing rules in `lib/extras_de_cont/rules/` for patterns.
|
|
71
|
+
|
|
72
|
+
### 3. Register the bank
|
|
73
|
+
|
|
74
|
+
Add the require and `BANK_RULES` entry in `lib/extras_de_cont.rb`:
|
|
75
|
+
|
|
76
|
+
```ruby
|
|
77
|
+
require "extras_de_cont/rules/<bank>"
|
|
78
|
+
# ...
|
|
79
|
+
BANK_RULES = {
|
|
80
|
+
brd: Rules::Brd,
|
|
81
|
+
<bank>: Rules::<BankName>,
|
|
82
|
+
revolut: Rules::Revolut,
|
|
83
|
+
unicredit: Rules::UniCredit
|
|
84
|
+
}.freeze
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### 4. Write tests
|
|
88
|
+
|
|
89
|
+
Add `test/extras_de_cont/rules/<bank>_rule_test.rb`. Use sanitized fixtures — never commit real financial data. Run with:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
bundle exec ruby -Ilib:test test/extras_de_cont/rules/<bank>_rule_test.rb
|
|
93
|
+
bundle exec rake standard
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### Code style
|
|
97
|
+
|
|
98
|
+
This project uses [Standard Ruby](https://github.com/standardrb/standard). Run `bundle exec rake standard` before submitting changes.
|
|
99
|
+
|
|
100
|
+
## AI-Assisted Development
|
|
101
|
+
|
|
102
|
+
This project supports AI-assisted development via [OpenCode](https://github.com/anomalyco/opencode).
|
|
103
|
+
|
|
104
|
+
A skill is provided for adding new bank statement parsers. When using an AI coding assistant, it can load the skill
|
|
105
|
+
at `.agents/skills/add-bank-statement/SKILL.md` to guide the process.
|
|
106
|
+
The skill covers rule class creation, registration, RBS signatures, test patterns, and fixture anonymization.
|
|
107
|
+
|
|
108
|
+
The project maintains a [MemPalace](https://github.com/anomalyco/mempalace) knowledge graph (`mempalace.yaml`)
|
|
109
|
+
that organizes the codebase into wings and rooms. AI agents can query this to learn about the project structure,
|
|
110
|
+
existing parsers, test patterns, and conventions without reading every file.
|
|
111
|
+
|
|
112
|
+
For more context, see `AGENTS.md`.
|
data/extras_de_cont.gemspec
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
Gem::Specification.new do |s|
|
|
4
4
|
s.name = "extras_de_cont"
|
|
5
|
-
s.version = "1.0
|
|
5
|
+
s.version = "1.2.0"
|
|
6
6
|
s.licenses = ["GPLv3"]
|
|
7
7
|
s.summary = "A simple library which helps you extract transactions from a PDF bank statement."
|
|
8
8
|
s.description = <<~TEXT
|
|
@@ -14,8 +14,8 @@ Gem::Specification.new do |s|
|
|
|
14
14
|
s.authors = ["Denis Nutiu"]
|
|
15
15
|
s.email = "dnutiu@nuculabs.dev"
|
|
16
16
|
s.homepage = "https://nuculabs.dev"
|
|
17
|
-
s.metadata = {
|
|
18
|
-
"rubygems_mfa_required" => "true"
|
|
17
|
+
s.metadata = {"source_code_uri" => "https://gitlab.nuculabs.dev/dnutiu/extras-de-cont",
|
|
18
|
+
"rubygems_mfa_required" => "true"}
|
|
19
19
|
s.required_ruby_version = ">= 3.0.0"
|
|
20
20
|
|
|
21
21
|
# Files to include in the gem
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "date"
|
|
4
|
+
require "extras_de_cont/transaction"
|
|
5
|
+
|
|
6
|
+
module ExtrasDeCont
|
|
7
|
+
module Rules
|
|
8
|
+
class Brd < Rules::Base
|
|
9
|
+
DATE_PREFIX = /\A(?<date>\d{2}\/\d{2}\/\d{4})\b/
|
|
10
|
+
AMOUNT_PATTERN = /\d{1,3}(?:\.\d{3})*,\d{2}/
|
|
11
|
+
TABLE_HEADER_PATTERN = /Data oper\.\s+Descriere operatiune\s+Debit\s+Credit\s+Data val\./
|
|
12
|
+
|
|
13
|
+
NOISE_PATTERNS = [
|
|
14
|
+
/\APag\./,
|
|
15
|
+
/\ABRD-Groupe/,
|
|
16
|
+
/\ACAPITAL SOCIAL/,
|
|
17
|
+
/\AMihalache/,
|
|
18
|
+
/\ATel:/,
|
|
19
|
+
/\ARO361579/,
|
|
20
|
+
/\A255\/06/,
|
|
21
|
+
/\APJR01INCR/,
|
|
22
|
+
/\ACIFRE CHEIE/,
|
|
23
|
+
/\ACONTURI DETINUTE/,
|
|
24
|
+
/\AFonduri proprii/,
|
|
25
|
+
/\ALimita de credit/,
|
|
26
|
+
/\ACredit neutilizat/,
|
|
27
|
+
/\ADescoperit/,
|
|
28
|
+
/\ANr\. Zile/,
|
|
29
|
+
/\ATotal disponibil/,
|
|
30
|
+
/\ATotal sume/,
|
|
31
|
+
/\ADomicilierea contului/,
|
|
32
|
+
/\AReferinte bancare/,
|
|
33
|
+
/\ATitular \/ Account/,
|
|
34
|
+
/\AIBAN /,
|
|
35
|
+
/\ANumar cont/,
|
|
36
|
+
/\AExtras de cont/,
|
|
37
|
+
/\ADe la \/ From/,
|
|
38
|
+
/\ADocumentul este/,
|
|
39
|
+
/\ACNP\/CUI:/,
|
|
40
|
+
/\ASWIFT/,
|
|
41
|
+
/\AAg\. /,
|
|
42
|
+
/\AStr\. /,
|
|
43
|
+
/\A•/,
|
|
44
|
+
/\Ahttp/,
|
|
45
|
+
/\ASold/,
|
|
46
|
+
/\ATotal debit/,
|
|
47
|
+
/\ACard:MBS/,
|
|
48
|
+
/\ATrans\.Date/
|
|
49
|
+
].freeze
|
|
50
|
+
|
|
51
|
+
def parse(text)
|
|
52
|
+
transactions = []
|
|
53
|
+
current_table = nil
|
|
54
|
+
current_currency = nil
|
|
55
|
+
above_lines = []
|
|
56
|
+
below_lines = []
|
|
57
|
+
date_line = nil
|
|
58
|
+
|
|
59
|
+
each_normalized_line(text) do |line|
|
|
60
|
+
if line.start_with?("Valuta / Currency")
|
|
61
|
+
current_currency = line.split.last
|
|
62
|
+
next
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
if table_header?(line)
|
|
66
|
+
try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
|
|
67
|
+
current_table = extract_columns(line)
|
|
68
|
+
above_lines = []
|
|
69
|
+
below_lines = []
|
|
70
|
+
date_line = nil
|
|
71
|
+
next
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
if noise?(line)
|
|
75
|
+
try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
|
|
76
|
+
above_lines = []
|
|
77
|
+
below_lines = []
|
|
78
|
+
date_line = nil
|
|
79
|
+
next
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
next if current_table.nil?
|
|
83
|
+
|
|
84
|
+
if date_line?(line)
|
|
85
|
+
try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
|
|
86
|
+
date_line = line
|
|
87
|
+
below_lines = []
|
|
88
|
+
next
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
if date_line
|
|
92
|
+
below_lines << line
|
|
93
|
+
else
|
|
94
|
+
above_lines << line
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
|
|
99
|
+
transactions
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
private
|
|
103
|
+
|
|
104
|
+
def each_normalized_line(text)
|
|
105
|
+
text.each_line do |line|
|
|
106
|
+
normalized = line.tr("\u00A0", " ").strip
|
|
107
|
+
next if normalized.empty?
|
|
108
|
+
yield normalized
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def table_header?(line)
|
|
113
|
+
line.match?(TABLE_HEADER_PATTERN)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def date_line?(line)
|
|
117
|
+
line.match?(DATE_PREFIX)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def noise?(line)
|
|
121
|
+
NOISE_PATTERNS.any? { |pattern| line.match?(pattern) }
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def extract_columns(line)
|
|
125
|
+
{
|
|
126
|
+
debit: line.index("Debit"),
|
|
127
|
+
credit: line.index("Credit")
|
|
128
|
+
}
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def try_flush(date_line, above_lines, below_lines, table, currency, transactions)
|
|
132
|
+
return if date_line.nil? || table.nil?
|
|
133
|
+
|
|
134
|
+
transaction = build_transaction(date_line, above_lines, below_lines, table, currency)
|
|
135
|
+
transactions << transaction if transaction
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def build_transaction(date_line, above_lines, below_lines, table, currency)
|
|
139
|
+
date_match = date_line.match(DATE_PREFIX)
|
|
140
|
+
return if date_match.nil?
|
|
141
|
+
|
|
142
|
+
date = parse_date(date_match[:date])
|
|
143
|
+
|
|
144
|
+
amounts_on_date = date_line.to_enum(:scan, AMOUNT_PATTERN).map { Regexp.last_match }
|
|
145
|
+
|
|
146
|
+
if amounts_on_date.any?
|
|
147
|
+
transaction_amount_match = amounts_on_date.first
|
|
148
|
+
main_description = date_line[date_match.end(0)...transaction_amount_match.begin(0)].to_s.strip
|
|
149
|
+
else
|
|
150
|
+
amount_line = below_lines.find { |l| l.match?(AMOUNT_PATTERN) }
|
|
151
|
+
return unless amount_line
|
|
152
|
+
|
|
153
|
+
amount_match = amount_line.match(AMOUNT_PATTERN)
|
|
154
|
+
transaction_amount_match = amount_match
|
|
155
|
+
main_description = date_line[date_match.end(0)..].to_s.strip
|
|
156
|
+
main_description = main_description.sub(/\s*\d{2}\/\d{2}\/\d{4}\s*\z/, "")
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
midpoint = (table[:debit] + 2 * table[:credit]) / 3
|
|
160
|
+
debit = transaction_amount_match.begin(0) < midpoint
|
|
161
|
+
amount = parse_amount(transaction_amount_match[0])
|
|
162
|
+
amount = -amount if debit
|
|
163
|
+
|
|
164
|
+
description_lines = below_lines.map(&:strip).reject(&:empty?)
|
|
165
|
+
if !amounts_on_date.any? && description_lines.first
|
|
166
|
+
stripped = description_lines.first.sub(AMOUNT_PATTERN, "").strip
|
|
167
|
+
description_lines[0] = stripped unless stripped.empty?
|
|
168
|
+
end
|
|
169
|
+
description_parts = [*above_lines.map(&:strip), main_description, *description_lines]
|
|
170
|
+
description = description_parts.reject(&:empty?).join(" | ")
|
|
171
|
+
|
|
172
|
+
Transaction.new(date, description, amount, currency)
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def parse_date(value)
|
|
176
|
+
day, month, year = value.split("/").map(&:to_i)
|
|
177
|
+
Date.new(year, month, day)
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def parse_amount(value)
|
|
181
|
+
value.delete(".").sub(",", ".").to_f
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
end
|
|
@@ -11,24 +11,45 @@ module ExtrasDeCont
|
|
|
11
11
|
"Pending from ",
|
|
12
12
|
"Account transactions from ",
|
|
13
13
|
"Reverted from ",
|
|
14
|
-
"Deposit transactions from "
|
|
14
|
+
"Deposit transactions from ",
|
|
15
|
+
"Transactions from "
|
|
15
16
|
].freeze
|
|
16
17
|
|
|
17
18
|
DOCUMENT_NOISE_HEADERS = [
|
|
19
|
+
"Account statement",
|
|
18
20
|
"Balance summary",
|
|
19
21
|
"The balance on your statement might differ",
|
|
22
|
+
"There were no transactions during this period",
|
|
23
|
+
"Transaction types",
|
|
24
|
+
"Your funds are held and protected by a licensed bank",
|
|
20
25
|
"Report lost or stolen card",
|
|
21
26
|
"+",
|
|
22
27
|
"Get help directly in app",
|
|
28
|
+
"Get help directly In app",
|
|
23
29
|
"Scan the QR code",
|
|
24
30
|
"RON Statement",
|
|
31
|
+
" Statement",
|
|
25
32
|
"Generated on the ",
|
|
26
33
|
"Revolut Bank UAB",
|
|
27
34
|
"© "
|
|
28
35
|
].freeze
|
|
29
36
|
|
|
30
|
-
|
|
31
|
-
|
|
37
|
+
CURRENCY_SYMBOLS = {
|
|
38
|
+
"$" => "USD",
|
|
39
|
+
"€" => "EUR",
|
|
40
|
+
"£" => "GBP",
|
|
41
|
+
"zł" => "PLN",
|
|
42
|
+
"Kč" => "CZK",
|
|
43
|
+
"Ft" => "HUF",
|
|
44
|
+
"лв" => "BGN",
|
|
45
|
+
"₺" => "TRY",
|
|
46
|
+
"₴" => "UAH"
|
|
47
|
+
}.freeze
|
|
48
|
+
DATE_FORMATS = ["%b %e, %Y", "%e %b %Y"].freeze
|
|
49
|
+
DATE_PREFIX = /\A(?<date>(?:[A-Z][a-z]{2} \d{1,2}, \d{4}|\d{1,2} [A-Z][a-z]{2} \d{4}))\b/
|
|
50
|
+
NUMBER = /-?(?:\d{1,3}(?:[ ,]\d{3})+|\d+)\.\d{2}/
|
|
51
|
+
CURRENCY_SYMBOL = Regexp.union(CURRENCY_SYMBOLS.keys.sort_by { |symbol| -symbol.length })
|
|
52
|
+
AMOUNT = /(?:#{NUMBER} [A-Z]{3}|#{CURRENCY_SYMBOL}#{NUMBER}|#{NUMBER} ?#{CURRENCY_SYMBOL})/
|
|
32
53
|
|
|
33
54
|
def parse(text)
|
|
34
55
|
transactions = []
|
|
@@ -105,16 +126,41 @@ module ExtrasDeCont
|
|
|
105
126
|
parse_date(match[:date]),
|
|
106
127
|
description,
|
|
107
128
|
amount,
|
|
108
|
-
amount_string
|
|
129
|
+
parse_currency(amount_string)
|
|
109
130
|
)
|
|
110
131
|
end
|
|
111
132
|
|
|
112
133
|
def parse_date(value)
|
|
113
|
-
|
|
134
|
+
DATE_FORMATS.each do |format|
|
|
135
|
+
return Date.strptime(value, format)
|
|
136
|
+
rescue Date::Error
|
|
137
|
+
next
|
|
138
|
+
end
|
|
114
139
|
end
|
|
115
140
|
|
|
116
141
|
def parse_amount(value)
|
|
117
|
-
value.
|
|
142
|
+
numeric_value(value).delete(", ").to_f
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def parse_currency(value)
|
|
146
|
+
symbol = currency_symbol(value)
|
|
147
|
+
return CURRENCY_SYMBOLS.fetch(symbol) if symbol
|
|
148
|
+
|
|
149
|
+
value.split.last
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def numeric_value(value)
|
|
153
|
+
symbol = currency_symbol(value)
|
|
154
|
+
return value.delete_prefix(symbol) if symbol && value.start_with?(symbol)
|
|
155
|
+
return value.delete_suffix(symbol).strip if symbol
|
|
156
|
+
|
|
157
|
+
value.sub(/\s+[A-Z]{3}\z/, "")
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def currency_symbol(value)
|
|
161
|
+
CURRENCY_SYMBOLS.keys.find do |symbol|
|
|
162
|
+
value.start_with?(symbol) || value.end_with?(symbol)
|
|
163
|
+
end
|
|
118
164
|
end
|
|
119
165
|
|
|
120
166
|
def section_header?(line)
|
|
@@ -147,11 +193,12 @@ module ExtrasDeCont
|
|
|
147
193
|
amount_matches = row.to_enum(:scan, AMOUNT).map { Regexp.last_match }
|
|
148
194
|
return if amount_matches.empty?
|
|
149
195
|
|
|
150
|
-
transaction_match =
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
196
|
+
transaction_match =
|
|
197
|
+
if table.fetch(:has_balance)
|
|
198
|
+
amount_matches[-2] if amount_matches.length > 1
|
|
199
|
+
else
|
|
200
|
+
amount_matches[-1]
|
|
201
|
+
end
|
|
155
202
|
return if transaction_match.nil?
|
|
156
203
|
|
|
157
204
|
description = row[date_match.end(0)...transaction_match.begin(0)].to_s.strip
|
|
@@ -1,8 +1,205 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require "date"
|
|
4
|
+
require "extras_de_cont/transaction"
|
|
5
|
+
|
|
3
6
|
module ExtrasDeCont
|
|
4
7
|
module Rules
|
|
5
8
|
class UniCredit < Rules::Base
|
|
9
|
+
ROMANIAN_MONTHS = {
|
|
10
|
+
"ianuarie" => 1, "februarie" => 2, "martie" => 3, "aprilie" => 4,
|
|
11
|
+
"mai" => 5, "iunie" => 6, "iulie" => 7, "august" => 8,
|
|
12
|
+
"septembrie" => 9, "octombrie" => 10, "noiembrie" => 11, "decembrie" => 12
|
|
13
|
+
}.freeze
|
|
14
|
+
|
|
15
|
+
RO_MONTH_NAMES = ROMANIAN_MONTHS.keys.freeze
|
|
16
|
+
DATE_PATTERN = /\b(\d{1,2})\s+(#{RO_MONTH_NAMES.join("|")})\s+(\d{4})\b/i
|
|
17
|
+
DATE_PREFIX = /\A\s*#{DATE_PATTERN}/
|
|
18
|
+
|
|
19
|
+
TABLE_HEADER_PATTERN = /Data\s+Descriere\s+Debit\s+Credit\s+Sold/
|
|
20
|
+
|
|
21
|
+
SECTION_HEADERS = [
|
|
22
|
+
"TRANZACȚII",
|
|
23
|
+
"SUMAR CONT",
|
|
24
|
+
"EXTRAS DE CONT"
|
|
25
|
+
].freeze
|
|
26
|
+
|
|
27
|
+
NOISE_PATTERNS = [
|
|
28
|
+
/\AUniCredit Bank S\.A\./,
|
|
29
|
+
/\ABulevardul/,
|
|
30
|
+
/\ASector \d/,
|
|
31
|
+
/\ATel:/,
|
|
32
|
+
/\AEmail:/,
|
|
33
|
+
/\Aunicredit\.ro/,
|
|
34
|
+
/\ACapital social:/,
|
|
35
|
+
/\APrezentul extras/,
|
|
36
|
+
/\AFondurile disponibile/,
|
|
37
|
+
/\APentru mai multe/,
|
|
38
|
+
/\ANUME CLIENT:/,
|
|
39
|
+
/\AADRESA:/,
|
|
40
|
+
/\ASUCURSALA:/,
|
|
41
|
+
/\ADATA EXTRAS CONT/,
|
|
42
|
+
/\APERIOADA/,
|
|
43
|
+
/\ATIP CONT:/,
|
|
44
|
+
/\AIBAN:/,
|
|
45
|
+
/\AMONEDA:/,
|
|
46
|
+
/\AOperator de date/,
|
|
47
|
+
/\ASold inițial/,
|
|
48
|
+
/\ASold final/,
|
|
49
|
+
/\AOperator de date cu/
|
|
50
|
+
].freeze
|
|
51
|
+
|
|
52
|
+
NEW_TRANSACTION_MARKERS = [
|
|
53
|
+
/\A\+CMS CLT-/,
|
|
54
|
+
/\A\+GPP/,
|
|
55
|
+
/\APlata electronica/,
|
|
56
|
+
/\APlata Instant/,
|
|
57
|
+
/\AIncasare Instant/,
|
|
58
|
+
/\ATransfer electronic/
|
|
59
|
+
].freeze
|
|
60
|
+
|
|
61
|
+
AMOUNT_PATTERN = /\d{1,3}(?:[.,]\d{3})*\.\d{2}/
|
|
62
|
+
CURRENCY_FROM_HEADER = /Sold\(([A-Z]{3})\)/
|
|
63
|
+
|
|
64
|
+
def parse(text)
|
|
65
|
+
transactions = []
|
|
66
|
+
current_currency = nil
|
|
67
|
+
current_table = nil
|
|
68
|
+
above_lines = []
|
|
69
|
+
below_lines = []
|
|
70
|
+
date_line = nil
|
|
71
|
+
|
|
72
|
+
each_normalized_line(text) do |line|
|
|
73
|
+
if (m = line.match(CURRENCY_FROM_HEADER))
|
|
74
|
+
current_currency = m[1]
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
if table_header?(line)
|
|
78
|
+
try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
|
|
79
|
+
current_table = extract_column_positions(line)
|
|
80
|
+
above_lines, below_lines, date_line = [], [], nil
|
|
81
|
+
next
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
if noise?(line) || section_header?(line)
|
|
85
|
+
try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
|
|
86
|
+
above_lines, below_lines, date_line = [], [], nil
|
|
87
|
+
next
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
next if current_table.nil?
|
|
91
|
+
|
|
92
|
+
if date_line?(line)
|
|
93
|
+
try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
|
|
94
|
+
date_line = line
|
|
95
|
+
below_lines = []
|
|
96
|
+
next
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
if date_line
|
|
100
|
+
if new_transaction_marker?(line)
|
|
101
|
+
try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
|
|
102
|
+
date_line, below_lines = nil, []
|
|
103
|
+
above_lines = [line]
|
|
104
|
+
else
|
|
105
|
+
below_lines << line
|
|
106
|
+
end
|
|
107
|
+
else
|
|
108
|
+
above_lines << line
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
|
|
113
|
+
transactions
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
private
|
|
117
|
+
|
|
118
|
+
def each_normalized_line(text)
|
|
119
|
+
text.each_line do |line|
|
|
120
|
+
normalized = line.tr("\u00A0", " ").strip
|
|
121
|
+
next if normalized.empty?
|
|
122
|
+
|
|
123
|
+
yield normalized
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def table_header?(line)
|
|
128
|
+
line.match?(TABLE_HEADER_PATTERN)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def date_line?(line)
|
|
132
|
+
line.match?(DATE_PREFIX)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def noise?(line)
|
|
136
|
+
NOISE_PATTERNS.any? { |pattern| line.match?(pattern) }
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def section_header?(line)
|
|
140
|
+
SECTION_HEADERS.any? { |header| line == header }
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def new_transaction_marker?(line)
|
|
144
|
+
NEW_TRANSACTION_MARKERS.any? { |pattern| line.match?(pattern) }
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def extract_column_positions(line)
|
|
148
|
+
{
|
|
149
|
+
debit: line.index("Debit"),
|
|
150
|
+
credit: line.index("Credit"),
|
|
151
|
+
sold: line.index("Sold")
|
|
152
|
+
}
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def try_flush(date_line, above_lines, below_lines, table, currency, transactions)
|
|
156
|
+
return if date_line.nil? || table.nil?
|
|
157
|
+
|
|
158
|
+
transaction = build_transaction(date_line, above_lines, below_lines, table, currency)
|
|
159
|
+
transactions << transaction if transaction
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def build_transaction(date_line, above_lines, below_lines, table, currency)
|
|
163
|
+
date_match = date_line.match(DATE_PREFIX)
|
|
164
|
+
return if date_match.nil?
|
|
165
|
+
|
|
166
|
+
amounts = date_line.to_enum(:scan, AMOUNT_PATTERN).map { Regexp.last_match }
|
|
167
|
+
return if amounts.size < 2
|
|
168
|
+
|
|
169
|
+
transaction_amount_match = amounts[-2]
|
|
170
|
+
description_start = date_match.end(0)
|
|
171
|
+
description_end = transaction_amount_match.begin(0)
|
|
172
|
+
main_description = date_line[description_start...description_end].to_s.strip
|
|
173
|
+
|
|
174
|
+
amount_string = transaction_amount_match[0]
|
|
175
|
+
amount = amount_string.delete(", ").to_f
|
|
176
|
+
midpoint = (table[:debit] + table[:credit]) / 2
|
|
177
|
+
amount = -amount if transaction_amount_match.begin(0) < midpoint
|
|
178
|
+
|
|
179
|
+
description = build_description(main_description, above_lines, below_lines)
|
|
180
|
+
|
|
181
|
+
Transaction.new(
|
|
182
|
+
parse_date(date_match[1].to_i, date_match[2], date_match[3].to_i),
|
|
183
|
+
description,
|
|
184
|
+
amount,
|
|
185
|
+
currency || extract_currency_from_header(date_line)
|
|
186
|
+
)
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
def parse_date(day, month_name, year)
|
|
190
|
+
month = ROMANIAN_MONTHS[month_name.downcase]
|
|
191
|
+
Date.new(year, month, day)
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def build_description(main_desc, above_lines, below_lines)
|
|
195
|
+
parts = [*above_lines.map(&:strip), main_desc, *below_lines.map(&:strip)]
|
|
196
|
+
parts.reject(&:empty?).join(" | ")
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def extract_currency_from_header(date_line)
|
|
200
|
+
m = date_line.match(CURRENCY_FROM_HEADER)
|
|
201
|
+
m ? m[1] : nil
|
|
202
|
+
end
|
|
6
203
|
end
|
|
7
204
|
end
|
|
8
205
|
end
|
data/lib/extras_de_cont.rb
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require "extras_de_cont/parser"
|
|
4
4
|
require "extras_de_cont/rules/base"
|
|
5
|
+
require "extras_de_cont/rules/brd"
|
|
5
6
|
require "extras_de_cont/rules/revolut"
|
|
6
7
|
require "extras_de_cont/rules/unicredit"
|
|
7
8
|
|
|
@@ -9,8 +10,9 @@ require "extras_de_cont/rules/unicredit"
|
|
|
9
10
|
module ExtrasDeCont
|
|
10
11
|
# Map of supported banks (symbol → rule class)
|
|
11
12
|
BANK_RULES = {
|
|
12
|
-
|
|
13
|
-
revolut: Rules::Revolut
|
|
13
|
+
brd: Rules::Brd,
|
|
14
|
+
revolut: Rules::Revolut,
|
|
15
|
+
unicredit: Rules::UniCredit
|
|
14
16
|
}.freeze
|
|
15
17
|
|
|
16
18
|
class << self
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: extras_de_cont
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.0
|
|
4
|
+
version: 1.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Denis Nutiu
|
|
@@ -39,6 +39,7 @@ files:
|
|
|
39
39
|
- lib/extras_de_cont.rb
|
|
40
40
|
- lib/extras_de_cont/parser.rb
|
|
41
41
|
- lib/extras_de_cont/rules/base.rb
|
|
42
|
+
- lib/extras_de_cont/rules/brd.rb
|
|
42
43
|
- lib/extras_de_cont/rules/revolut.rb
|
|
43
44
|
- lib/extras_de_cont/rules/unicredit.rb
|
|
44
45
|
- lib/extras_de_cont/transaction.rb
|