extras_de_cont 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +75 -0
- data/extras_de_cont.gemspec +3 -3
- data/lib/extras_de_cont/rules/brd.rb +185 -0
- data/lib/extras_de_cont.rb +4 -2
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 6dde609d769123f6ba26d34fc0efa79b82ad91b18f6f84d725362480f6c9ef3a
|
|
4
|
+
data.tar.gz: 236b56eb1c323f3dafc2278135424f5cd51edb336fdd45462c0c73a1e1db83af
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 48a679a6514c868d895d68ba56ed821e4528b7b48d9acb90638f16a5d723b2736e7e67afd6fdd85481ec1c9980318e16bc823091f564e6bc1963c554bc505e6c
|
|
7
|
+
data.tar.gz: a31df94d256b541092714e9d54351cd785e46dd0e58120c593699733cb9f90530548bcce9040d5cdfa36c421d775c80335f95a19f24711918549e454c1fee147
|
data/README.md
CHANGED
|
@@ -35,3 +35,78 @@ Run the Revolut parser test with:
|
|
|
35
35
|
```bash
|
|
36
36
|
ruby -Ilib:test test/revolut_rule_test.rb
|
|
37
37
|
```
|
|
38
|
+
|
|
39
|
+
## Supported Banks
|
|
40
|
+
|
|
41
|
+
| Bank | Symbol | Currencies | Features |
|
|
42
|
+
|---|---|---|---|
|
|
43
|
+
| Revolut | `:revolut` | RON, EUR, USD, GBP, PLN, CZK, HUF, BGN, TRY, UAH | Personal & Business, multi-section, symbol currencies |
|
|
44
|
+
| UniCredit | `:unicredit` | RON, EUR | Romanian month names, page breaks, transaction markers |
|
|
45
|
+
| BRD | `:brd` | RON, EUR | Below-line amounts, Romanian number format |
|
|
46
|
+
|
|
47
|
+
## Development
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
bundle install # Install dependencies
|
|
51
|
+
bundle exec rake test # Run all tests
|
|
52
|
+
bundle exec rake standard # Run linter
|
|
53
|
+
bundle exec rake build # Build gem
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Contributing
|
|
57
|
+
|
|
58
|
+
Contributions are welcome. Here is how to add a new bank:
|
|
59
|
+
|
|
60
|
+
### 1. Gather information
|
|
61
|
+
|
|
62
|
+
Obtain a sample PDF statement from the bank and extract its text:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
ruby -Ilib -e 'require "extras_de_cont"; puts ExtrasDeCont::Parser.new(ARGV[0]).text' /path/to/statement.pdf
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### 2. Create a rule class
|
|
69
|
+
|
|
70
|
+
Add `lib/extras_de_cont/rules/<bank>.rb` inheriting from `Rules::Base`. See existing rules in `lib/extras_de_cont/rules/` for patterns.
|
|
71
|
+
|
|
72
|
+
### 3. Register the bank
|
|
73
|
+
|
|
74
|
+
Add the require and `BANK_RULES` entry in `lib/extras_de_cont.rb`:
|
|
75
|
+
|
|
76
|
+
```ruby
|
|
77
|
+
require "extras_de_cont/rules/<bank>"
|
|
78
|
+
# ...
|
|
79
|
+
BANK_RULES = {
|
|
80
|
+
brd: Rules::Brd,
|
|
81
|
+
<bank>: Rules::<BankName>,
|
|
82
|
+
revolut: Rules::Revolut,
|
|
83
|
+
unicredit: Rules::UniCredit
|
|
84
|
+
}.freeze
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
### 4. Write tests
|
|
88
|
+
|
|
89
|
+
Add `test/extras_de_cont/rules/<bank>_rule_test.rb`. Use sanitized fixtures — never commit real financial data. Run with:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
bundle exec ruby -Ilib:test test/extras_de_cont/rules/<bank>_rule_test.rb
|
|
93
|
+
bundle exec rake standard
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### Code style
|
|
97
|
+
|
|
98
|
+
This project uses [Standard Ruby](https://github.com/standardrb/standard). Run `bundle exec rake standard` before submitting changes.
|
|
99
|
+
|
|
100
|
+
## AI-Assisted Development
|
|
101
|
+
|
|
102
|
+
This project supports AI-assisted development via [OpenCode](https://github.com/anomalyco/opencode).
|
|
103
|
+
|
|
104
|
+
A skill is provided for adding new bank statement parsers. When using an AI coding assistant, it can load the skill
|
|
105
|
+
at `.agents/skills/add-bank-statement/SKILL.md` to guide the process.
|
|
106
|
+
The skill covers rule class creation, registration, RBS signatures, test patterns, and fixture anonymization.
|
|
107
|
+
|
|
108
|
+
The project maintains a [MemPalace](https://github.com/anomalyco/mempalace) knowledge graph (`mempalace.yaml`)
|
|
109
|
+
that organizes the codebase into wings and rooms. AI agents can query this to learn about the project structure,
|
|
110
|
+
existing parsers, test patterns, and conventions without reading every file.
|
|
111
|
+
|
|
112
|
+
For more context, see `AGENTS.md`.
|
data/extras_de_cont.gemspec
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
Gem::Specification.new do |s|
|
|
4
4
|
s.name = "extras_de_cont"
|
|
5
|
-
s.version = "1.
|
|
5
|
+
s.version = "1.2.0"
|
|
6
6
|
s.licenses = ["GPLv3"]
|
|
7
7
|
s.summary = "A simple library which helps you extract transactions from a PDF bank statement."
|
|
8
8
|
s.description = <<~TEXT
|
|
@@ -14,8 +14,8 @@ Gem::Specification.new do |s|
|
|
|
14
14
|
s.authors = ["Denis Nutiu"]
|
|
15
15
|
s.email = "dnutiu@nuculabs.dev"
|
|
16
16
|
s.homepage = "https://nuculabs.dev"
|
|
17
|
-
s.metadata = {
|
|
18
|
-
"rubygems_mfa_required" => "true"
|
|
17
|
+
s.metadata = {"source_code_uri" => "https://gitlab.nuculabs.dev/dnutiu/extras-de-cont",
|
|
18
|
+
"rubygems_mfa_required" => "true"}
|
|
19
19
|
s.required_ruby_version = ">= 3.0.0"
|
|
20
20
|
|
|
21
21
|
# Files to include in the gem
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "date"
|
|
4
|
+
require "extras_de_cont/transaction"
|
|
5
|
+
|
|
6
|
+
module ExtrasDeCont
|
|
7
|
+
module Rules
|
|
8
|
+
class Brd < Rules::Base
|
|
9
|
+
DATE_PREFIX = /\A(?<date>\d{2}\/\d{2}\/\d{4})\b/
|
|
10
|
+
AMOUNT_PATTERN = /\d{1,3}(?:\.\d{3})*,\d{2}/
|
|
11
|
+
TABLE_HEADER_PATTERN = /Data oper\.\s+Descriere operatiune\s+Debit\s+Credit\s+Data val\./
|
|
12
|
+
|
|
13
|
+
NOISE_PATTERNS = [
|
|
14
|
+
/\APag\./,
|
|
15
|
+
/\ABRD-Groupe/,
|
|
16
|
+
/\ACAPITAL SOCIAL/,
|
|
17
|
+
/\AMihalache/,
|
|
18
|
+
/\ATel:/,
|
|
19
|
+
/\ARO361579/,
|
|
20
|
+
/\A255\/06/,
|
|
21
|
+
/\APJR01INCR/,
|
|
22
|
+
/\ACIFRE CHEIE/,
|
|
23
|
+
/\ACONTURI DETINUTE/,
|
|
24
|
+
/\AFonduri proprii/,
|
|
25
|
+
/\ALimita de credit/,
|
|
26
|
+
/\ACredit neutilizat/,
|
|
27
|
+
/\ADescoperit/,
|
|
28
|
+
/\ANr\. Zile/,
|
|
29
|
+
/\ATotal disponibil/,
|
|
30
|
+
/\ATotal sume/,
|
|
31
|
+
/\ADomicilierea contului/,
|
|
32
|
+
/\AReferinte bancare/,
|
|
33
|
+
/\ATitular \/ Account/,
|
|
34
|
+
/\AIBAN /,
|
|
35
|
+
/\ANumar cont/,
|
|
36
|
+
/\AExtras de cont/,
|
|
37
|
+
/\ADe la \/ From/,
|
|
38
|
+
/\ADocumentul este/,
|
|
39
|
+
/\ACNP\/CUI:/,
|
|
40
|
+
/\ASWIFT/,
|
|
41
|
+
/\AAg\. /,
|
|
42
|
+
/\AStr\. /,
|
|
43
|
+
/\A•/,
|
|
44
|
+
/\Ahttp/,
|
|
45
|
+
/\ASold/,
|
|
46
|
+
/\ATotal debit/,
|
|
47
|
+
/\ACard:MBS/,
|
|
48
|
+
/\ATrans\.Date/
|
|
49
|
+
].freeze
|
|
50
|
+
|
|
51
|
+
def parse(text)
|
|
52
|
+
transactions = []
|
|
53
|
+
current_table = nil
|
|
54
|
+
current_currency = nil
|
|
55
|
+
above_lines = []
|
|
56
|
+
below_lines = []
|
|
57
|
+
date_line = nil
|
|
58
|
+
|
|
59
|
+
each_normalized_line(text) do |line|
|
|
60
|
+
if line.start_with?("Valuta / Currency")
|
|
61
|
+
current_currency = line.split.last
|
|
62
|
+
next
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
if table_header?(line)
|
|
66
|
+
try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
|
|
67
|
+
current_table = extract_columns(line)
|
|
68
|
+
above_lines = []
|
|
69
|
+
below_lines = []
|
|
70
|
+
date_line = nil
|
|
71
|
+
next
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
if noise?(line)
|
|
75
|
+
try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
|
|
76
|
+
above_lines = []
|
|
77
|
+
below_lines = []
|
|
78
|
+
date_line = nil
|
|
79
|
+
next
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
next if current_table.nil?
|
|
83
|
+
|
|
84
|
+
if date_line?(line)
|
|
85
|
+
try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
|
|
86
|
+
date_line = line
|
|
87
|
+
below_lines = []
|
|
88
|
+
next
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
if date_line
|
|
92
|
+
below_lines << line
|
|
93
|
+
else
|
|
94
|
+
above_lines << line
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
|
|
99
|
+
transactions
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
private
|
|
103
|
+
|
|
104
|
+
def each_normalized_line(text)
|
|
105
|
+
text.each_line do |line|
|
|
106
|
+
normalized = line.tr("\u00A0", " ").strip
|
|
107
|
+
next if normalized.empty?
|
|
108
|
+
yield normalized
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def table_header?(line)
|
|
113
|
+
line.match?(TABLE_HEADER_PATTERN)
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def date_line?(line)
|
|
117
|
+
line.match?(DATE_PREFIX)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def noise?(line)
|
|
121
|
+
NOISE_PATTERNS.any? { |pattern| line.match?(pattern) }
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def extract_columns(line)
|
|
125
|
+
{
|
|
126
|
+
debit: line.index("Debit"),
|
|
127
|
+
credit: line.index("Credit")
|
|
128
|
+
}
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def try_flush(date_line, above_lines, below_lines, table, currency, transactions)
|
|
132
|
+
return if date_line.nil? || table.nil?
|
|
133
|
+
|
|
134
|
+
transaction = build_transaction(date_line, above_lines, below_lines, table, currency)
|
|
135
|
+
transactions << transaction if transaction
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def build_transaction(date_line, above_lines, below_lines, table, currency)
|
|
139
|
+
date_match = date_line.match(DATE_PREFIX)
|
|
140
|
+
return if date_match.nil?
|
|
141
|
+
|
|
142
|
+
date = parse_date(date_match[:date])
|
|
143
|
+
|
|
144
|
+
amounts_on_date = date_line.to_enum(:scan, AMOUNT_PATTERN).map { Regexp.last_match }
|
|
145
|
+
|
|
146
|
+
if amounts_on_date.any?
|
|
147
|
+
transaction_amount_match = amounts_on_date.first
|
|
148
|
+
main_description = date_line[date_match.end(0)...transaction_amount_match.begin(0)].to_s.strip
|
|
149
|
+
else
|
|
150
|
+
amount_line = below_lines.find { |l| l.match?(AMOUNT_PATTERN) }
|
|
151
|
+
return unless amount_line
|
|
152
|
+
|
|
153
|
+
amount_match = amount_line.match(AMOUNT_PATTERN)
|
|
154
|
+
transaction_amount_match = amount_match
|
|
155
|
+
main_description = date_line[date_match.end(0)..].to_s.strip
|
|
156
|
+
main_description = main_description.sub(/\s*\d{2}\/\d{2}\/\d{4}\s*\z/, "")
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
midpoint = (table[:debit] + 2 * table[:credit]) / 3
|
|
160
|
+
debit = transaction_amount_match.begin(0) < midpoint
|
|
161
|
+
amount = parse_amount(transaction_amount_match[0])
|
|
162
|
+
amount = -amount if debit
|
|
163
|
+
|
|
164
|
+
description_lines = below_lines.map(&:strip).reject(&:empty?)
|
|
165
|
+
if !amounts_on_date.any? && description_lines.first
|
|
166
|
+
stripped = description_lines.first.sub(AMOUNT_PATTERN, "").strip
|
|
167
|
+
description_lines[0] = stripped unless stripped.empty?
|
|
168
|
+
end
|
|
169
|
+
description_parts = [*above_lines.map(&:strip), main_description, *description_lines]
|
|
170
|
+
description = description_parts.reject(&:empty?).join(" | ")
|
|
171
|
+
|
|
172
|
+
Transaction.new(date, description, amount, currency)
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def parse_date(value)
|
|
176
|
+
day, month, year = value.split("/").map(&:to_i)
|
|
177
|
+
Date.new(year, month, day)
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def parse_amount(value)
|
|
181
|
+
value.delete(".").sub(",", ".").to_f
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
end
|
data/lib/extras_de_cont.rb
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
require "extras_de_cont/parser"
|
|
4
4
|
require "extras_de_cont/rules/base"
|
|
5
|
+
require "extras_de_cont/rules/brd"
|
|
5
6
|
require "extras_de_cont/rules/revolut"
|
|
6
7
|
require "extras_de_cont/rules/unicredit"
|
|
7
8
|
|
|
@@ -9,8 +10,9 @@ require "extras_de_cont/rules/unicredit"
|
|
|
9
10
|
module ExtrasDeCont
|
|
10
11
|
# Map of supported banks (symbol → rule class)
|
|
11
12
|
BANK_RULES = {
|
|
12
|
-
|
|
13
|
-
revolut: Rules::Revolut
|
|
13
|
+
brd: Rules::Brd,
|
|
14
|
+
revolut: Rules::Revolut,
|
|
15
|
+
unicredit: Rules::UniCredit
|
|
14
16
|
}.freeze
|
|
15
17
|
|
|
16
18
|
class << self
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: extras_de_cont
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Denis Nutiu
|
|
@@ -39,6 +39,7 @@ files:
|
|
|
39
39
|
- lib/extras_de_cont.rb
|
|
40
40
|
- lib/extras_de_cont/parser.rb
|
|
41
41
|
- lib/extras_de_cont/rules/base.rb
|
|
42
|
+
- lib/extras_de_cont/rules/brd.rb
|
|
42
43
|
- lib/extras_de_cont/rules/revolut.rb
|
|
43
44
|
- lib/extras_de_cont/rules/unicredit.rb
|
|
44
45
|
- lib/extras_de_cont/transaction.rb
|