extras_de_cont 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6dde609d769123f6ba26d34fc0efa79b82ad91b18f6f84d725362480f6c9ef3a
4
- data.tar.gz: 236b56eb1c323f3dafc2278135424f5cd51edb336fdd45462c0c73a1e1db83af
3
+ metadata.gz: df78cdd18c6c97610fd739d7154922d9d5cafe9d8460c54104429698d93d0479
4
+ data.tar.gz: 8ea76ba23c3c32f5a4d39c7610abdb3dbef95a8c96cb831592316c81165bd610
5
5
  SHA512:
6
- metadata.gz: 48a679a6514c868d895d68ba56ed821e4528b7b48d9acb90638f16a5d723b2736e7e67afd6fdd85481ec1c9980318e16bc823091f564e6bc1963c554bc505e6c
7
- data.tar.gz: a31df94d256b541092714e9d54351cd785e46dd0e58120c593699733cb9f90530548bcce9040d5cdfa36c421d775c80335f95a19f24711918549e454c1fee147
6
+ metadata.gz: ccfbbd9f393fe5de67d966edbdfae2b50e2cc23a40d8ec5def3e532176648e5243b08e22a7387423ecce688c81e0e0f9dd78a5b493678f620b079ee0e63b5ab5
7
+ data.tar.gz: 44b5c55785d2b93c0c85797826a90fc56733c9fa2dc60e494e61ee2e3041857498d32603cf410ac9c70998f463971b72134f4a9d8cd986374249713c3fe8a124
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "extras_de_cont"
5
- s.version = "1.2.0"
5
+ s.version = "1.3.0"
6
6
  s.licenses = ["GPLv3"]
7
7
  s.summary = "A simple library which helps you extract transactions from a PDF bank statement."
8
8
  s.description = <<~TEXT
@@ -0,0 +1,174 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "date"
4
+ require "extras_de_cont/transaction"
5
+
6
+ module ExtrasDeCont
7
+ module Rules
8
+ class Ing < Rules::Base
9
+ ROMANIAN_MONTHS = {
10
+ "ianuarie" => 1, "februarie" => 2, "martie" => 3, "aprilie" => 4,
11
+ "mai" => 5, "iunie" => 6, "iulie" => 7, "august" => 8,
12
+ "septembrie" => 9, "octombrie" => 10, "noiembrie" => 11, "decembrie" => 12
13
+ }.freeze
14
+
15
+ RO_MONTH_NAMES = ROMANIAN_MONTHS.keys.freeze
16
+ DATE_PATTERN = /\b(\d{1,2})\s+(#{RO_MONTH_NAMES.join("|")})\s+(\d{4})\b/i
17
+ DATE_PREFIX = /\A\s*#{DATE_PATTERN}/
18
+
19
+ TABLE_HEADER_PATTERN = /Data\s+Detalii tranzactie\s+Debit\s+Credit/
20
+
21
+ AMOUNT_PATTERN = /\d{1,3}(?:\.\d{3})*,\d{2}/
22
+
23
+ NOISE_PATTERNS = [
24
+ /\AExtras de cont\z/,
25
+ /\APentru perioada:/,
26
+ /\AValabil fara semnatura/,
27
+ /\AING Bank/,
28
+ /\ASediul:/,
29
+ /\ANr\. inregistrare/,
30
+ /\ACIF:/,
31
+ /\ATitular cont:/,
32
+ /\ACNP:/,
33
+ /\AStr\. /,
34
+ /\ATip cont:/,
35
+ /\ANumar cont:/,
36
+ /\AMoneda:/,
37
+ /\A\d{6},/,
38
+ /\ARoxana Petria/,
39
+ /\AAlexandra Ilie/,
40
+ /\AȘef Serviciu/,
41
+ /\ASef Serviciu/,
42
+ /\ASucursala/,
43
+ /\AÎN/,
44
+ /\AInformatii despre/,
45
+ /\Ape www\./,
46
+ /\d+\/\d+$/
47
+ ].freeze
48
+
49
+ def parse(text)
50
+ transactions = []
51
+ current_currency = nil
52
+ current_table = nil
53
+ above_lines = []
54
+ below_lines = []
55
+ date_line = nil
56
+
57
+ each_normalized_line(text) do |line|
58
+ if line.start_with?("Moneda:")
59
+ current_currency = line.split.last
60
+ next
61
+ end
62
+
63
+ if table_header?(line)
64
+ try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
65
+ current_table = extract_column_positions(line)
66
+ above_lines, below_lines, date_line = [], [], nil
67
+ next
68
+ end
69
+
70
+ if noise?(line)
71
+ try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
72
+ above_lines, below_lines, date_line = [], [], nil
73
+ next
74
+ end
75
+
76
+ next if current_table.nil?
77
+
78
+ if date_line?(line)
79
+ try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
80
+ date_line = line
81
+ below_lines = []
82
+ next
83
+ end
84
+
85
+ if date_line
86
+ below_lines << line
87
+ else
88
+ above_lines << line
89
+ end
90
+ end
91
+
92
+ try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
93
+ transactions
94
+ end
95
+
96
+ private
97
+
98
+ def each_normalized_line(text)
99
+ text.each_line do |line|
100
+ normalized = line.tr("\u00A0", " ").strip
101
+ next if normalized.empty?
102
+ yield normalized
103
+ end
104
+ end
105
+
106
+ def table_header?(line)
107
+ line.match?(TABLE_HEADER_PATTERN)
108
+ end
109
+
110
+ def date_line?(line)
111
+ line.match?(DATE_PREFIX)
112
+ end
113
+
114
+ def noise?(line)
115
+ NOISE_PATTERNS.any? { |pattern| line.match?(pattern) }
116
+ end
117
+
118
+ def extract_column_positions(line)
119
+ {
120
+ debit: line.index("Debit"),
121
+ credit: line.index("Credit")
122
+ }
123
+ end
124
+
125
+ def try_flush(date_line, above_lines, below_lines, table, currency, transactions)
126
+ return if date_line.nil? || table.nil?
127
+
128
+ transaction = build_transaction(date_line, above_lines, below_lines, table, currency)
129
+ transactions << transaction if transaction
130
+ end
131
+
132
+ def build_transaction(date_line, above_lines, below_lines, table, currency)
133
+ date_match = date_line.match(DATE_PREFIX)
134
+ return if date_match.nil?
135
+
136
+ amounts = date_line.to_enum(:scan, AMOUNT_PATTERN).map { Regexp.last_match }
137
+ return if amounts.empty?
138
+
139
+ transaction_amount_match = amounts.last
140
+ description_start = date_match.end(0)
141
+ description_end = transaction_amount_match.begin(0)
142
+ main_description = date_line[description_start...description_end].to_s.strip
143
+
144
+ amount_string = transaction_amount_match[0]
145
+ amount = parse_amount(amount_string)
146
+ midpoint = (table[:debit] + table[:credit]) / 2
147
+ amount = -amount if transaction_amount_match.begin(0) < midpoint
148
+
149
+ description = build_description(main_description, above_lines, below_lines)
150
+
151
+ Transaction.new(
152
+ parse_date(date_match[1].to_i, date_match[2], date_match[3].to_i),
153
+ description,
154
+ amount,
155
+ currency
156
+ )
157
+ end
158
+
159
+ def parse_date(day, month_name, year)
160
+ month = ROMANIAN_MONTHS[month_name.downcase]
161
+ Date.new(year, month, day)
162
+ end
163
+
164
+ def parse_amount(value)
165
+ value.delete(".").sub(",", ".").to_f
166
+ end
167
+
168
+ def build_description(main_desc, above_lines, below_lines)
169
+ parts = [*above_lines.map(&:strip), main_desc, *below_lines.map(&:strip)]
170
+ parts.reject(&:empty?).join(" | ")
171
+ end
172
+ end
173
+ end
174
+ end
@@ -3,6 +3,7 @@
3
3
  require "extras_de_cont/parser"
4
4
  require "extras_de_cont/rules/base"
5
5
  require "extras_de_cont/rules/brd"
6
+ require "extras_de_cont/rules/ing"
6
7
  require "extras_de_cont/rules/revolut"
7
8
  require "extras_de_cont/rules/unicredit"
8
9
 
@@ -11,6 +12,7 @@ module ExtrasDeCont
11
12
  # Map of supported banks (symbol → rule class)
12
13
  BANK_RULES = {
13
14
  brd: Rules::Brd,
15
+ ing: Rules::Ing,
14
16
  revolut: Rules::Revolut,
15
17
  unicredit: Rules::UniCredit
16
18
  }.freeze
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: extras_de_cont
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Denis Nutiu
@@ -40,6 +40,7 @@ files:
40
40
  - lib/extras_de_cont/parser.rb
41
41
  - lib/extras_de_cont/rules/base.rb
42
42
  - lib/extras_de_cont/rules/brd.rb
43
+ - lib/extras_de_cont/rules/ing.rb
43
44
  - lib/extras_de_cont/rules/revolut.rb
44
45
  - lib/extras_de_cont/rules/unicredit.rb
45
46
  - lib/extras_de_cont/transaction.rb