extras_de_cont 1.1.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '029f7b904cb76faa331a686ec02f81a0fad9ffce39e0bea1c05e0ed95a6bb34c'
4
- data.tar.gz: c98d9dc06a00db1cface2c7ea7b58f563502c2fa58da4481d2a928e99fe08766
3
+ metadata.gz: df78cdd18c6c97610fd739d7154922d9d5cafe9d8460c54104429698d93d0479
4
+ data.tar.gz: 8ea76ba23c3c32f5a4d39c7610abdb3dbef95a8c96cb831592316c81165bd610
5
5
  SHA512:
6
- metadata.gz: cd20be09910a3ca8ad2a13bc07ce619d76950e080e3bb927a2f3dee0f24441f173ad842437dfc9f8196b24579d5976519c5558031aa8cd33e93cd5263e229705
7
- data.tar.gz: 27c5cda6efa0043af432ac596c7d2ff736dd3f26c520f3cd2ea79a45506eb41a48d5c9ca236432d1f9999e73f9d8c11dabc201e4ee23cab82f08e9bc7a351b6b
6
+ metadata.gz: ccfbbd9f393fe5de67d966edbdfae2b50e2cc23a40d8ec5def3e532176648e5243b08e22a7387423ecce688c81e0e0f9dd78a5b493678f620b079ee0e63b5ab5
7
+ data.tar.gz: 44b5c55785d2b93c0c85797826a90fc56733c9fa2dc60e494e61ee2e3041857498d32603cf410ac9c70998f463971b72134f4a9d8cd986374249713c3fe8a124
data/README.md CHANGED
@@ -35,3 +35,78 @@ Run the Revolut parser test with:
35
35
  ```bash
36
36
  ruby -Ilib:test test/revolut_rule_test.rb
37
37
  ```
38
+
39
+ ## Supported Banks
40
+
41
+ | Bank | Symbol | Currencies | Features |
42
+ |---|---|---|---|
43
+ | Revolut | `:revolut` | RON, EUR, USD, GBP, PLN, CZK, HUF, BGN, TRY, UAH | Personal & Business, multi-section, symbol currencies |
44
+ | UniCredit | `:unicredit` | RON, EUR | Romanian month names, page breaks, transaction markers |
45
+ | BRD | `:brd` | RON, EUR | Below-line amounts, Romanian number format |
46
+
47
+ ## Development
48
+
49
+ ```bash
50
+ bundle install # Install dependencies
51
+ bundle exec rake test # Run all tests
52
+ bundle exec rake standard # Run linter
53
+ bundle exec rake build # Build gem
54
+ ```
55
+
56
+ ## Contributing
57
+
58
+ Contributions are welcome. Here is how to add a new bank:
59
+
60
+ ### 1. Gather information
61
+
62
+ Obtain a sample PDF statement from the bank and extract its text:
63
+
64
+ ```bash
65
+ ruby -Ilib -e 'require "extras_de_cont"; puts ExtrasDeCont::Parser.new(ARGV[0]).text' /path/to/statement.pdf
66
+ ```
67
+
68
+ ### 2. Create a rule class
69
+
70
+ Add `lib/extras_de_cont/rules/<bank>.rb` inheriting from `Rules::Base`. See existing rules in `lib/extras_de_cont/rules/` for patterns.
71
+
72
+ ### 3. Register the bank
73
+
74
+ Add the require and `BANK_RULES` entry in `lib/extras_de_cont.rb`:
75
+
76
+ ```ruby
77
+ require "extras_de_cont/rules/<bank>"
78
+ # ...
79
+ BANK_RULES = {
80
+ brd: Rules::Brd,
81
+ <bank>: Rules::<BankName>,
82
+ revolut: Rules::Revolut,
83
+ unicredit: Rules::UniCredit
84
+ }.freeze
85
+ ```
86
+
87
+ ### 4. Write tests
88
+
89
+ Add `test/extras_de_cont/rules/<bank>_rule_test.rb`. Use sanitized fixtures — never commit real financial data. Run with:
90
+
91
+ ```bash
92
+ bundle exec ruby -Ilib:test test/extras_de_cont/rules/<bank>_rule_test.rb
93
+ bundle exec rake standard
94
+ ```
95
+
96
+ ### Code style
97
+
98
+ This project uses [Standard Ruby](https://github.com/standardrb/standard). Run `bundle exec rake standard` before submitting changes.
99
+
100
+ ## AI-Assisted Development
101
+
102
+ This project supports AI-assisted development via [OpenCode](https://github.com/anomalyco/opencode).
103
+
104
+ A skill is provided for adding new bank statement parsers. When using an AI coding assistant, it can load the skill
105
+ at `.agents/skills/add-bank-statement/SKILL.md` to guide the process.
106
+ The skill covers rule class creation, registration, RBS signatures, test patterns, and fixture anonymization.
107
+
108
+ The project maintains a [MemPalace](https://github.com/anomalyco/mempalace) knowledge graph (`mempalace.yaml`)
109
+ that organizes the codebase into wings and rooms. AI agents can query this to learn about the project structure,
110
+ existing parsers, test patterns, and conventions without reading every file.
111
+
112
+ For more context, see `AGENTS.md`.
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "extras_de_cont"
5
- s.version = "1.1.0"
5
+ s.version = "1.3.0"
6
6
  s.licenses = ["GPLv3"]
7
7
  s.summary = "A simple library which helps you extract transactions from a PDF bank statement."
8
8
  s.description = <<~TEXT
@@ -14,8 +14,8 @@ Gem::Specification.new do |s|
14
14
  s.authors = ["Denis Nutiu"]
15
15
  s.email = "dnutiu@nuculabs.dev"
16
16
  s.homepage = "https://nuculabs.dev"
17
- s.metadata = { "source_code_uri" => "https://gitlab.nuculabs.dev/dnutiu/extras-de-cont",
18
- "rubygems_mfa_required" => "true" }
17
+ s.metadata = {"source_code_uri" => "https://gitlab.nuculabs.dev/dnutiu/extras-de-cont",
18
+ "rubygems_mfa_required" => "true"}
19
19
  s.required_ruby_version = ">= 3.0.0"
20
20
 
21
21
  # Files to include in the gem
@@ -0,0 +1,185 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "date"
4
+ require "extras_de_cont/transaction"
5
+
6
+ module ExtrasDeCont
7
+ module Rules
8
+ class Brd < Rules::Base
9
+ DATE_PREFIX = /\A(?<date>\d{2}\/\d{2}\/\d{4})\b/
10
+ AMOUNT_PATTERN = /\d{1,3}(?:\.\d{3})*,\d{2}/
11
+ TABLE_HEADER_PATTERN = /Data oper\.\s+Descriere operatiune\s+Debit\s+Credit\s+Data val\./
12
+
13
+ NOISE_PATTERNS = [
14
+ /\APag\./,
15
+ /\ABRD-Groupe/,
16
+ /\ACAPITAL SOCIAL/,
17
+ /\AMihalache/,
18
+ /\ATel:/,
19
+ /\ARO361579/,
20
+ /\A255\/06/,
21
+ /\APJR01INCR/,
22
+ /\ACIFRE CHEIE/,
23
+ /\ACONTURI DETINUTE/,
24
+ /\AFonduri proprii/,
25
+ /\ALimita de credit/,
26
+ /\ACredit neutilizat/,
27
+ /\ADescoperit/,
28
+ /\ANr\. Zile/,
29
+ /\ATotal disponibil/,
30
+ /\ATotal sume/,
31
+ /\ADomicilierea contului/,
32
+ /\AReferinte bancare/,
33
+ /\ATitular \/ Account/,
34
+ /\AIBAN /,
35
+ /\ANumar cont/,
36
+ /\AExtras de cont/,
37
+ /\ADe la \/ From/,
38
+ /\ADocumentul este/,
39
+ /\ACNP\/CUI:/,
40
+ /\ASWIFT/,
41
+ /\AAg\. /,
42
+ /\AStr\. /,
43
+ /\A•/,
44
+ /\Ahttp/,
45
+ /\ASold/,
46
+ /\ATotal debit/,
47
+ /\ACard:MBS/,
48
+ /\ATrans\.Date/
49
+ ].freeze
50
+
51
+ def parse(text)
52
+ transactions = []
53
+ current_table = nil
54
+ current_currency = nil
55
+ above_lines = []
56
+ below_lines = []
57
+ date_line = nil
58
+
59
+ each_normalized_line(text) do |line|
60
+ if line.start_with?("Valuta / Currency")
61
+ current_currency = line.split.last
62
+ next
63
+ end
64
+
65
+ if table_header?(line)
66
+ try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
67
+ current_table = extract_columns(line)
68
+ above_lines = []
69
+ below_lines = []
70
+ date_line = nil
71
+ next
72
+ end
73
+
74
+ if noise?(line)
75
+ try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
76
+ above_lines = []
77
+ below_lines = []
78
+ date_line = nil
79
+ next
80
+ end
81
+
82
+ next if current_table.nil?
83
+
84
+ if date_line?(line)
85
+ try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
86
+ date_line = line
87
+ below_lines = []
88
+ next
89
+ end
90
+
91
+ if date_line
92
+ below_lines << line
93
+ else
94
+ above_lines << line
95
+ end
96
+ end
97
+
98
+ try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
99
+ transactions
100
+ end
101
+
102
+ private
103
+
104
+ def each_normalized_line(text)
105
+ text.each_line do |line|
106
+ normalized = line.tr("\u00A0", " ").strip
107
+ next if normalized.empty?
108
+ yield normalized
109
+ end
110
+ end
111
+
112
+ def table_header?(line)
113
+ line.match?(TABLE_HEADER_PATTERN)
114
+ end
115
+
116
+ def date_line?(line)
117
+ line.match?(DATE_PREFIX)
118
+ end
119
+
120
+ def noise?(line)
121
+ NOISE_PATTERNS.any? { |pattern| line.match?(pattern) }
122
+ end
123
+
124
+ def extract_columns(line)
125
+ {
126
+ debit: line.index("Debit"),
127
+ credit: line.index("Credit")
128
+ }
129
+ end
130
+
131
+ def try_flush(date_line, above_lines, below_lines, table, currency, transactions)
132
+ return if date_line.nil? || table.nil?
133
+
134
+ transaction = build_transaction(date_line, above_lines, below_lines, table, currency)
135
+ transactions << transaction if transaction
136
+ end
137
+
138
+ def build_transaction(date_line, above_lines, below_lines, table, currency)
139
+ date_match = date_line.match(DATE_PREFIX)
140
+ return if date_match.nil?
141
+
142
+ date = parse_date(date_match[:date])
143
+
144
+ amounts_on_date = date_line.to_enum(:scan, AMOUNT_PATTERN).map { Regexp.last_match }
145
+
146
+ if amounts_on_date.any?
147
+ transaction_amount_match = amounts_on_date.first
148
+ main_description = date_line[date_match.end(0)...transaction_amount_match.begin(0)].to_s.strip
149
+ else
150
+ amount_line = below_lines.find { |l| l.match?(AMOUNT_PATTERN) }
151
+ return unless amount_line
152
+
153
+ amount_match = amount_line.match(AMOUNT_PATTERN)
154
+ transaction_amount_match = amount_match
155
+ main_description = date_line[date_match.end(0)..].to_s.strip
156
+ main_description = main_description.sub(/\s*\d{2}\/\d{2}\/\d{4}\s*\z/, "")
157
+ end
158
+
159
+ midpoint = (table[:debit] + 2 * table[:credit]) / 3
160
+ debit = transaction_amount_match.begin(0) < midpoint
161
+ amount = parse_amount(transaction_amount_match[0])
162
+ amount = -amount if debit
163
+
164
+ description_lines = below_lines.map(&:strip).reject(&:empty?)
165
+ if !amounts_on_date.any? && description_lines.first
166
+ stripped = description_lines.first.sub(AMOUNT_PATTERN, "").strip
167
+ description_lines[0] = stripped unless stripped.empty?
168
+ end
169
+ description_parts = [*above_lines.map(&:strip), main_description, *description_lines]
170
+ description = description_parts.reject(&:empty?).join(" | ")
171
+
172
+ Transaction.new(date, description, amount, currency)
173
+ end
174
+
175
+ def parse_date(value)
176
+ day, month, year = value.split("/").map(&:to_i)
177
+ Date.new(year, month, day)
178
+ end
179
+
180
+ def parse_amount(value)
181
+ value.delete(".").sub(",", ".").to_f
182
+ end
183
+ end
184
+ end
185
+ end
@@ -0,0 +1,174 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "date"
4
+ require "extras_de_cont/transaction"
5
+
6
+ module ExtrasDeCont
7
+ module Rules
8
+ class Ing < Rules::Base
9
+ ROMANIAN_MONTHS = {
10
+ "ianuarie" => 1, "februarie" => 2, "martie" => 3, "aprilie" => 4,
11
+ "mai" => 5, "iunie" => 6, "iulie" => 7, "august" => 8,
12
+ "septembrie" => 9, "octombrie" => 10, "noiembrie" => 11, "decembrie" => 12
13
+ }.freeze
14
+
15
+ RO_MONTH_NAMES = ROMANIAN_MONTHS.keys.freeze
16
+ DATE_PATTERN = /\b(\d{1,2})\s+(#{RO_MONTH_NAMES.join("|")})\s+(\d{4})\b/i
17
+ DATE_PREFIX = /\A\s*#{DATE_PATTERN}/
18
+
19
+ TABLE_HEADER_PATTERN = /Data\s+Detalii tranzactie\s+Debit\s+Credit/
20
+
21
+ AMOUNT_PATTERN = /\d{1,3}(?:\.\d{3})*,\d{2}/
22
+
23
+ NOISE_PATTERNS = [
24
+ /\AExtras de cont\z/,
25
+ /\APentru perioada:/,
26
+ /\AValabil fara semnatura/,
27
+ /\AING Bank/,
28
+ /\ASediul:/,
29
+ /\ANr\. inregistrare/,
30
+ /\ACIF:/,
31
+ /\ATitular cont:/,
32
+ /\ACNP:/,
33
+ /\AStr\. /,
34
+ /\ATip cont:/,
35
+ /\ANumar cont:/,
36
+ /\AMoneda:/,
37
+ /\A\d{6},/,
38
+ /\ARoxana Petria/,
39
+ /\AAlexandra Ilie/,
40
+ /\AȘef Serviciu/,
41
+ /\ASef Serviciu/,
42
+ /\ASucursala/,
43
+ /\AÎN/,
44
+ /\AInformatii despre/,
45
+ /\Ape www\./,
46
+ /\d+\/\d+$/
47
+ ].freeze
48
+
49
+ def parse(text)
50
+ transactions = []
51
+ current_currency = nil
52
+ current_table = nil
53
+ above_lines = []
54
+ below_lines = []
55
+ date_line = nil
56
+
57
+ each_normalized_line(text) do |line|
58
+ if line.start_with?("Moneda:")
59
+ current_currency = line.split.last
60
+ next
61
+ end
62
+
63
+ if table_header?(line)
64
+ try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
65
+ current_table = extract_column_positions(line)
66
+ above_lines, below_lines, date_line = [], [], nil
67
+ next
68
+ end
69
+
70
+ if noise?(line)
71
+ try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
72
+ above_lines, below_lines, date_line = [], [], nil
73
+ next
74
+ end
75
+
76
+ next if current_table.nil?
77
+
78
+ if date_line?(line)
79
+ try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
80
+ date_line = line
81
+ below_lines = []
82
+ next
83
+ end
84
+
85
+ if date_line
86
+ below_lines << line
87
+ else
88
+ above_lines << line
89
+ end
90
+ end
91
+
92
+ try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
93
+ transactions
94
+ end
95
+
96
+ private
97
+
98
+ def each_normalized_line(text)
99
+ text.each_line do |line|
100
+ normalized = line.tr("\u00A0", " ").strip
101
+ next if normalized.empty?
102
+ yield normalized
103
+ end
104
+ end
105
+
106
+ def table_header?(line)
107
+ line.match?(TABLE_HEADER_PATTERN)
108
+ end
109
+
110
+ def date_line?(line)
111
+ line.match?(DATE_PREFIX)
112
+ end
113
+
114
+ def noise?(line)
115
+ NOISE_PATTERNS.any? { |pattern| line.match?(pattern) }
116
+ end
117
+
118
+ def extract_column_positions(line)
119
+ {
120
+ debit: line.index("Debit"),
121
+ credit: line.index("Credit")
122
+ }
123
+ end
124
+
125
+ def try_flush(date_line, above_lines, below_lines, table, currency, transactions)
126
+ return if date_line.nil? || table.nil?
127
+
128
+ transaction = build_transaction(date_line, above_lines, below_lines, table, currency)
129
+ transactions << transaction if transaction
130
+ end
131
+
132
+ def build_transaction(date_line, above_lines, below_lines, table, currency)
133
+ date_match = date_line.match(DATE_PREFIX)
134
+ return if date_match.nil?
135
+
136
+ amounts = date_line.to_enum(:scan, AMOUNT_PATTERN).map { Regexp.last_match }
137
+ return if amounts.empty?
138
+
139
+ transaction_amount_match = amounts.last
140
+ description_start = date_match.end(0)
141
+ description_end = transaction_amount_match.begin(0)
142
+ main_description = date_line[description_start...description_end].to_s.strip
143
+
144
+ amount_string = transaction_amount_match[0]
145
+ amount = parse_amount(amount_string)
146
+ midpoint = (table[:debit] + table[:credit]) / 2
147
+ amount = -amount if transaction_amount_match.begin(0) < midpoint
148
+
149
+ description = build_description(main_description, above_lines, below_lines)
150
+
151
+ Transaction.new(
152
+ parse_date(date_match[1].to_i, date_match[2], date_match[3].to_i),
153
+ description,
154
+ amount,
155
+ currency
156
+ )
157
+ end
158
+
159
+ def parse_date(day, month_name, year)
160
+ month = ROMANIAN_MONTHS[month_name.downcase]
161
+ Date.new(year, month, day)
162
+ end
163
+
164
+ def parse_amount(value)
165
+ value.delete(".").sub(",", ".").to_f
166
+ end
167
+
168
+ def build_description(main_desc, above_lines, below_lines)
169
+ parts = [*above_lines.map(&:strip), main_desc, *below_lines.map(&:strip)]
170
+ parts.reject(&:empty?).join(" | ")
171
+ end
172
+ end
173
+ end
174
+ end
@@ -2,6 +2,8 @@
2
2
 
3
3
  require "extras_de_cont/parser"
4
4
  require "extras_de_cont/rules/base"
5
+ require "extras_de_cont/rules/brd"
6
+ require "extras_de_cont/rules/ing"
5
7
  require "extras_de_cont/rules/revolut"
6
8
  require "extras_de_cont/rules/unicredit"
7
9
 
@@ -9,8 +11,10 @@ require "extras_de_cont/rules/unicredit"
9
11
  module ExtrasDeCont
10
12
  # Map of supported banks (symbol → rule class)
11
13
  BANK_RULES = {
12
- unicredit: Rules::UniCredit,
13
- revolut: Rules::Revolut
14
+ brd: Rules::Brd,
15
+ ing: Rules::Ing,
16
+ revolut: Rules::Revolut,
17
+ unicredit: Rules::UniCredit
14
18
  }.freeze
15
19
 
16
20
  class << self
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: extras_de_cont
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Denis Nutiu
@@ -39,6 +39,8 @@ files:
39
39
  - lib/extras_de_cont.rb
40
40
  - lib/extras_de_cont/parser.rb
41
41
  - lib/extras_de_cont/rules/base.rb
42
+ - lib/extras_de_cont/rules/brd.rb
43
+ - lib/extras_de_cont/rules/ing.rb
42
44
  - lib/extras_de_cont/rules/revolut.rb
43
45
  - lib/extras_de_cont/rules/unicredit.rb
44
46
  - lib/extras_de_cont/transaction.rb