extras_de_cont 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6dde609d769123f6ba26d34fc0efa79b82ad91b18f6f84d725362480f6c9ef3a
4
- data.tar.gz: 236b56eb1c323f3dafc2278135424f5cd51edb336fdd45462c0c73a1e1db83af
3
+ metadata.gz: f9393dd1d6d9cec4a77e34393af8a0ccb3459c0486835f7df0eed74410cff435
4
+ data.tar.gz: f5008d24c8f24b1f2b7a0d0bda8a675a2c886fba09e86b9d91bb888c7a1d7b92
5
5
  SHA512:
6
- metadata.gz: 48a679a6514c868d895d68ba56ed821e4528b7b48d9acb90638f16a5d723b2736e7e67afd6fdd85481ec1c9980318e16bc823091f564e6bc1963c554bc505e6c
7
- data.tar.gz: a31df94d256b541092714e9d54351cd785e46dd0e58120c593699733cb9f90530548bcce9040d5cdfa36c421d775c80335f95a19f24711918549e454c1fee147
6
+ metadata.gz: 07e61564409929f0c316b834ac18ad00d54a4a0a9f6b1f221c012e3064f5ad046de1324781545455f6417fb4198450b49773613e1d59b936a8410e25286238e3
7
+ data.tar.gz: 64cce605262cdcb5a31aa164b76e6f108f8b24149f136c92d237b888dcd894813cda0fce0bf8e801862e162ecfbd88e095cc479c61661f2bab9054bb822b08a0
data/README.md CHANGED
@@ -38,11 +38,12 @@ ruby -Ilib:test test/revolut_rule_test.rb
38
38
 
39
39
  ## Supported Banks
40
40
 
41
- | Bank | Symbol | Currencies | Features |
42
- |---|---|---|---|
43
- | Revolut | `:revolut` | RON, EUR, USD, GBP, PLN, CZK, HUF, BGN, TRY, UAH | Personal & Business, multi-section, symbol currencies |
41
+ | Bank | Symbol | Currencies | Features |
42
+ |-----------|--------------|---|--------------------------------------------------------|
43
+ | Revolut | `:revolut` | RON, EUR, USD, GBP, PLN, CZK, HUF, BGN, TRY, UAH | Personal & Business, multi-section, symbol currencies |
44
44
  | UniCredit | `:unicredit` | RON, EUR | Romanian month names, page breaks, transaction markers |
45
- | BRD | `:brd` | RON, EUR | Below-line amounts, Romanian number format |
45
+ | BRD | `:brd` | RON, EUR | Below-line amounts, Romanian number format |
46
+ | ING | `:ing` | RON | Normal bank statements in RON |
46
47
 
47
48
  ## Development
48
49
 
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "extras_de_cont"
5
- s.version = "1.2.0"
5
+ s.version = "1.4.0"
6
6
  s.licenses = ["GPLv3"]
7
7
  s.summary = "A simple library which helps you extract transactions from a PDF bank statement."
8
8
  s.description = <<~TEXT
@@ -25,4 +25,5 @@ Gem::Specification.new do |s|
25
25
  s.require_paths = ["lib"]
26
26
 
27
27
  s.add_dependency "pdf-reader", "~> 2.15"
28
+ s.add_dependency "zeitwerk", "~> 2.8"
28
29
  end
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "date"
4
- require "extras_de_cont/transaction"
5
4
 
6
5
  module ExtrasDeCont
7
6
  module Rules
@@ -0,0 +1,173 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "date"
4
+
5
+ module ExtrasDeCont
6
+ module Rules
7
+ class Ing < Rules::Base
8
+ ROMANIAN_MONTHS = {
9
+ "ianuarie" => 1, "februarie" => 2, "martie" => 3, "aprilie" => 4,
10
+ "mai" => 5, "iunie" => 6, "iulie" => 7, "august" => 8,
11
+ "septembrie" => 9, "octombrie" => 10, "noiembrie" => 11, "decembrie" => 12
12
+ }.freeze
13
+
14
+ RO_MONTH_NAMES = ROMANIAN_MONTHS.keys.freeze
15
+ DATE_PATTERN = /\b(\d{1,2})\s+(#{RO_MONTH_NAMES.join("|")})\s+(\d{4})\b/i
16
+ DATE_PREFIX = /\A\s*#{DATE_PATTERN}/
17
+
18
+ TABLE_HEADER_PATTERN = /Data\s+Detalii tranzactie\s+Debit\s+Credit/
19
+
20
+ AMOUNT_PATTERN = /\d{1,3}(?:\.\d{3})*,\d{2}/
21
+
22
+ NOISE_PATTERNS = [
23
+ /\AExtras de cont\z/,
24
+ /\APentru perioada:/,
25
+ /\AValabil fara semnatura/,
26
+ /\AING Bank/,
27
+ /\ASediul:/,
28
+ /\ANr\. inregistrare/,
29
+ /\ACIF:/,
30
+ /\ATitular cont:/,
31
+ /\ACNP:/,
32
+ /\AStr\. /,
33
+ /\ATip cont:/,
34
+ /\ANumar cont:/,
35
+ /\AMoneda:/,
36
+ /\A\d{6},/,
37
+ /\ARoxana Petria/,
38
+ /\AAlexandra Ilie/,
39
+ /\AȘef Serviciu/,
40
+ /\ASef Serviciu/,
41
+ /\ASucursala/,
42
+ /\AÎN/,
43
+ /\AInformatii despre/,
44
+ /\Ape www\./,
45
+ /\d+\/\d+$/
46
+ ].freeze
47
+
48
+ def parse(text)
49
+ transactions = []
50
+ current_currency = nil
51
+ current_table = nil
52
+ above_lines = []
53
+ below_lines = []
54
+ date_line = nil
55
+
56
+ each_normalized_line(text) do |line|
57
+ if line.start_with?("Moneda:")
58
+ current_currency = line.split.last
59
+ next
60
+ end
61
+
62
+ if table_header?(line)
63
+ try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
64
+ current_table = extract_column_positions(line)
65
+ above_lines, below_lines, date_line = [], [], nil
66
+ next
67
+ end
68
+
69
+ if noise?(line)
70
+ try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
71
+ above_lines, below_lines, date_line = [], [], nil
72
+ next
73
+ end
74
+
75
+ next if current_table.nil?
76
+
77
+ if date_line?(line)
78
+ try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
79
+ date_line = line
80
+ below_lines = []
81
+ next
82
+ end
83
+
84
+ if date_line
85
+ below_lines << line
86
+ else
87
+ above_lines << line
88
+ end
89
+ end
90
+
91
+ try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
92
+ transactions
93
+ end
94
+
95
+ private
96
+
97
+ def each_normalized_line(text)
98
+ text.each_line do |line|
99
+ normalized = line.tr("\u00A0", " ").strip
100
+ next if normalized.empty?
101
+ yield normalized
102
+ end
103
+ end
104
+
105
+ def table_header?(line)
106
+ line.match?(TABLE_HEADER_PATTERN)
107
+ end
108
+
109
+ def date_line?(line)
110
+ line.match?(DATE_PREFIX)
111
+ end
112
+
113
+ def noise?(line)
114
+ NOISE_PATTERNS.any? { |pattern| line.match?(pattern) }
115
+ end
116
+
117
+ def extract_column_positions(line)
118
+ {
119
+ debit: line.index("Debit"),
120
+ credit: line.index("Credit")
121
+ }
122
+ end
123
+
124
+ def try_flush(date_line, above_lines, below_lines, table, currency, transactions)
125
+ return if date_line.nil? || table.nil?
126
+
127
+ transaction = build_transaction(date_line, above_lines, below_lines, table, currency)
128
+ transactions << transaction if transaction
129
+ end
130
+
131
+ def build_transaction(date_line, above_lines, below_lines, table, currency)
132
+ date_match = date_line.match(DATE_PREFIX)
133
+ return if date_match.nil?
134
+
135
+ amounts = date_line.to_enum(:scan, AMOUNT_PATTERN).map { Regexp.last_match }
136
+ return if amounts.empty?
137
+
138
+ transaction_amount_match = amounts.last
139
+ description_start = date_match.end(0)
140
+ description_end = transaction_amount_match.begin(0)
141
+ main_description = date_line[description_start...description_end].to_s.strip
142
+
143
+ amount_string = transaction_amount_match[0]
144
+ amount = parse_amount(amount_string)
145
+ midpoint = (table[:debit] + table[:credit]) / 2
146
+ amount = -amount if transaction_amount_match.begin(0) < midpoint
147
+
148
+ description = build_description(main_description, above_lines, below_lines)
149
+
150
+ Transaction.new(
151
+ parse_date(date_match[1].to_i, date_match[2], date_match[3].to_i),
152
+ description,
153
+ amount,
154
+ currency
155
+ )
156
+ end
157
+
158
+ def parse_date(day, month_name, year)
159
+ month = ROMANIAN_MONTHS[month_name.downcase]
160
+ Date.new(year, month, day)
161
+ end
162
+
163
+ def parse_amount(value)
164
+ value.delete(".").sub(",", ".").to_f
165
+ end
166
+
167
+ def build_description(main_desc, above_lines, below_lines)
168
+ parts = [*above_lines.map(&:strip), main_desc, *below_lines.map(&:strip)]
169
+ parts.reject(&:empty?).join(" | ")
170
+ end
171
+ end
172
+ end
173
+ end
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "date"
4
- require "extras_de_cont/transaction"
5
4
 
6
5
  module ExtrasDeCont
7
6
  module Rules
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "date"
4
- require "extras_de_cont/transaction"
5
4
 
6
5
  module ExtrasDeCont
7
6
  module Rules
@@ -1,18 +1,23 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "extras_de_cont/parser"
4
- require "extras_de_cont/rules/base"
5
- require "extras_de_cont/rules/brd"
6
- require "extras_de_cont/rules/revolut"
7
- require "extras_de_cont/rules/unicredit"
3
+
4
+ require 'zeitwerk'
5
+ loader = Zeitwerk::Loader.for_gem
6
+ loader.inflector.inflect(
7
+ "unicredit" => "UniCredit"
8
+ )
9
+ loader.setup # ready!
10
+
11
+
8
12
 
9
13
  # The ExtrasDeCont module contains utilities for parsing bank statements.
10
14
  module ExtrasDeCont
11
15
  # Map of supported banks (symbol → rule class)
12
16
  BANK_RULES = {
13
- brd: Rules::Brd,
14
- revolut: Rules::Revolut,
15
- unicredit: Rules::UniCredit
17
+ brd: ExtrasDeCont::Rules::Brd,
18
+ ing: ExtrasDeCont::Rules::Ing,
19
+ revolut: ExtrasDeCont::Rules::Revolut,
20
+ unicredit: ExtrasDeCont::Rules::UniCredit
16
21
  }.freeze
17
22
 
18
23
  class << self
@@ -26,8 +31,10 @@ module ExtrasDeCont
26
31
  rule_class = BANK_RULES[bank]
27
32
  raise ArgumentError, "Unsupported bank: #{bank}. Supported banks: #{BANK_RULES.keys.join(", ")}" unless rule_class
28
33
 
29
- p = Parser.new(file)
34
+ p = ExtrasDeCont::Parser.new(file)
30
35
  p.parse_with(rule_class.new)
31
36
  end
32
37
  end
33
38
  end
39
+
40
+ loader.eager_load # optionally
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: extras_de_cont
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Denis Nutiu
@@ -23,6 +23,20 @@ dependencies:
23
23
  - - "~>"
24
24
  - !ruby/object:Gem::Version
25
25
  version: '2.15'
26
+ - !ruby/object:Gem::Dependency
27
+ name: zeitwerk
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '2.8'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '2.8'
26
40
  description: |
27
41
  A simple library which helps you extract transactions from a PDF bank statement.
28
42
  Fine tuned for Romanian bank statements.
@@ -40,6 +54,7 @@ files:
40
54
  - lib/extras_de_cont/parser.rb
41
55
  - lib/extras_de_cont/rules/base.rb
42
56
  - lib/extras_de_cont/rules/brd.rb
57
+ - lib/extras_de_cont/rules/ing.rb
43
58
  - lib/extras_de_cont/rules/revolut.rb
44
59
  - lib/extras_de_cont/rules/unicredit.rb
45
60
  - lib/extras_de_cont/transaction.rb