extras_de_cont 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3adf9f273d4799d379dd3b863486098e0f74713c679abeea1c4d3bc9c19c84f3
4
- data.tar.gz: a4f71318dfc8c9ee3a604b776bc0975b3af69d8487cdddef78c25a754193fcda
3
+ metadata.gz: '029f7b904cb76faa331a686ec02f81a0fad9ffce39e0bea1c05e0ed95a6bb34c'
4
+ data.tar.gz: c98d9dc06a00db1cface2c7ea7b58f563502c2fa58da4481d2a928e99fe08766
5
5
  SHA512:
6
- metadata.gz: e6b6afc9476cdf5dfb67f1dbb6e7cf8404da6757329cc3435a9bb42d0f064a4fa0a7df98a30003ed4fd821724551933076a88a75dd156e4c2818989a73aa0487
7
- data.tar.gz: adb65db1676e5181ae680a45ab1c0c0b56ed6a3d8622b591fe5fa5cba4d670ed00e990318d72919664194e6c5d83630882561294cf1bfa65b42b2c77a92875a7
6
+ metadata.gz: cd20be09910a3ca8ad2a13bc07ce619d76950e080e3bb927a2f3dee0f24441f173ad842437dfc9f8196b24579d5976519c5558031aa8cd33e93cd5263e229705
7
+ data.tar.gz: 27c5cda6efa0043af432ac596c7d2ff736dd3f26c520f3cd2ea79a45506eb41a48d5c9ca236432d1f9999e73f9d8c11dabc201e4ee23cab82f08e9bc7a351b6b
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = "extras_de_cont"
5
- s.version = "1.0.2"
5
+ s.version = "1.1.0"
6
6
  s.licenses = ["GPLv3"]
7
7
  s.summary = "A simple library which helps you extract transactions from a PDF bank statement."
8
8
  s.description = <<~TEXT
@@ -11,24 +11,45 @@ module ExtrasDeCont
11
11
  "Pending from ",
12
12
  "Account transactions from ",
13
13
  "Reverted from ",
14
- "Deposit transactions from "
14
+ "Deposit transactions from ",
15
+ "Transactions from "
15
16
  ].freeze
16
17
 
17
18
  DOCUMENT_NOISE_HEADERS = [
19
+ "Account statement",
18
20
  "Balance summary",
19
21
  "The balance on your statement might differ",
22
+ "There were no transactions during this period",
23
+ "Transaction types",
24
+ "Your funds are held and protected by a licensed bank",
20
25
  "Report lost or stolen card",
21
26
  "+",
22
27
  "Get help directly in app",
28
+ "Get help directly In app",
23
29
  "Scan the QR code",
24
30
  "RON Statement",
31
+ " Statement",
25
32
  "Generated on the ",
26
33
  "Revolut Bank UAB",
27
34
  "© "
28
35
  ].freeze
29
36
 
30
- DATE_PREFIX = /\A(?<date>[A-Z][a-z]{2} \d{1,2}, \d{4})\b/
31
- AMOUNT = /-?\d[\d,]*\.\d{2} [A-Z]{3}/
37
+ CURRENCY_SYMBOLS = {
38
+ "$" => "USD",
39
+ "€" => "EUR",
40
+ "£" => "GBP",
41
+ "zł" => "PLN",
42
+ "Kč" => "CZK",
43
+ "Ft" => "HUF",
44
+ "лв" => "BGN",
45
+ "₺" => "TRY",
46
+ "₴" => "UAH"
47
+ }.freeze
48
+ DATE_FORMATS = ["%b %e, %Y", "%e %b %Y"].freeze
49
+ DATE_PREFIX = /\A(?<date>(?:[A-Z][a-z]{2} \d{1,2}, \d{4}|\d{1,2} [A-Z][a-z]{2} \d{4}))\b/
50
+ NUMBER = /-?(?:\d{1,3}(?:[ ,]\d{3})+|\d+)\.\d{2}/
51
+ CURRENCY_SYMBOL = Regexp.union(CURRENCY_SYMBOLS.keys.sort_by { |symbol| -symbol.length })
52
+ AMOUNT = /(?:#{NUMBER} [A-Z]{3}|#{CURRENCY_SYMBOL}#{NUMBER}|#{NUMBER} ?#{CURRENCY_SYMBOL})/
32
53
 
33
54
  def parse(text)
34
55
  transactions = []
@@ -105,16 +126,41 @@ module ExtrasDeCont
105
126
  parse_date(match[:date]),
106
127
  description,
107
128
  amount,
108
- amount_string.split.last
129
+ parse_currency(amount_string)
109
130
  )
110
131
  end
111
132
 
112
133
  def parse_date(value)
113
- Date.strptime(value, "%b %e, %Y")
134
+ DATE_FORMATS.each do |format|
135
+ return Date.strptime(value, format)
136
+ rescue Date::Error
137
+ next
138
+ end
114
139
  end
115
140
 
116
141
  def parse_amount(value)
117
- value.split.first.delete(",").to_f
142
+ numeric_value(value).delete(", ").to_f
143
+ end
144
+
145
+ def parse_currency(value)
146
+ symbol = currency_symbol(value)
147
+ return CURRENCY_SYMBOLS.fetch(symbol) if symbol
148
+
149
+ value.split.last
150
+ end
151
+
152
+ def numeric_value(value)
153
+ symbol = currency_symbol(value)
154
+ return value.delete_prefix(symbol) if symbol && value.start_with?(symbol)
155
+ return value.delete_suffix(symbol).strip if symbol
156
+
157
+ value.sub(/\s+[A-Z]{3}\z/, "")
158
+ end
159
+
160
+ def currency_symbol(value)
161
+ CURRENCY_SYMBOLS.keys.find do |symbol|
162
+ value.start_with?(symbol) || value.end_with?(symbol)
163
+ end
118
164
  end
119
165
 
120
166
  def section_header?(line)
@@ -147,11 +193,12 @@ module ExtrasDeCont
147
193
  amount_matches = row.to_enum(:scan, AMOUNT).map { Regexp.last_match }
148
194
  return if amount_matches.empty?
149
195
 
150
- transaction_match = if table.fetch(:has_balance)
151
- amount_matches[-2] if amount_matches.length > 1
152
- else
153
- amount_matches[-1]
154
- end
196
+ transaction_match =
197
+ if table.fetch(:has_balance)
198
+ amount_matches[-2] if amount_matches.length > 1
199
+ else
200
+ amount_matches[-1]
201
+ end
155
202
  return if transaction_match.nil?
156
203
 
157
204
  description = row[date_match.end(0)...transaction_match.begin(0)].to_s.strip
@@ -1,8 +1,205 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "date"
4
+ require "extras_de_cont/transaction"
5
+
3
6
  module ExtrasDeCont
4
7
  module Rules
5
8
  class UniCredit < Rules::Base
9
+ ROMANIAN_MONTHS = {
10
+ "ianuarie" => 1, "februarie" => 2, "martie" => 3, "aprilie" => 4,
11
+ "mai" => 5, "iunie" => 6, "iulie" => 7, "august" => 8,
12
+ "septembrie" => 9, "octombrie" => 10, "noiembrie" => 11, "decembrie" => 12
13
+ }.freeze
14
+
15
+ RO_MONTH_NAMES = ROMANIAN_MONTHS.keys.freeze
16
+ DATE_PATTERN = /\b(\d{1,2})\s+(#{RO_MONTH_NAMES.join("|")})\s+(\d{4})\b/i
17
+ DATE_PREFIX = /\A\s*#{DATE_PATTERN}/
18
+
19
+ TABLE_HEADER_PATTERN = /Data\s+Descriere\s+Debit\s+Credit\s+Sold/
20
+
21
+ SECTION_HEADERS = [
22
+ "TRANZACȚII",
23
+ "SUMAR CONT",
24
+ "EXTRAS DE CONT"
25
+ ].freeze
26
+
27
+ NOISE_PATTERNS = [
28
+ /\AUniCredit Bank S\.A\./,
29
+ /\ABulevardul/,
30
+ /\ASector \d/,
31
+ /\ATel:/,
32
+ /\AEmail:/,
33
+ /\Aunicredit\.ro/,
34
+ /\ACapital social:/,
35
+ /\APrezentul extras/,
36
+ /\AFondurile disponibile/,
37
+ /\APentru mai multe/,
38
+ /\ANUME CLIENT:/,
39
+ /\AADRESA:/,
40
+ /\ASUCURSALA:/,
41
+ /\ADATA EXTRAS CONT/,
42
+ /\APERIOADA/,
43
+ /\ATIP CONT:/,
44
+ /\AIBAN:/,
45
+ /\AMONEDA:/,
46
+ /\AOperator de date/,
47
+ /\ASold inițial/,
48
+ /\ASold final/,
49
+ /\AOperator de date cu/
50
+ ].freeze
51
+
52
+ NEW_TRANSACTION_MARKERS = [
53
+ /\A\+CMS CLT-/,
54
+ /\A\+GPP/,
55
+ /\APlata electronica/,
56
+ /\APlata Instant/,
57
+ /\AIncasare Instant/,
58
+ /\ATransfer electronic/
59
+ ].freeze
60
+
61
+ AMOUNT_PATTERN = /\d{1,3}(?:[.,]\d{3})*\.\d{2}/
62
+ CURRENCY_FROM_HEADER = /Sold\(([A-Z]{3})\)/
63
+
64
+ def parse(text)
65
+ transactions = []
66
+ current_currency = nil
67
+ current_table = nil
68
+ above_lines = []
69
+ below_lines = []
70
+ date_line = nil
71
+
72
+ each_normalized_line(text) do |line|
73
+ if (m = line.match(CURRENCY_FROM_HEADER))
74
+ current_currency = m[1]
75
+ end
76
+
77
+ if table_header?(line)
78
+ try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
79
+ current_table = extract_column_positions(line)
80
+ above_lines, below_lines, date_line = [], [], nil
81
+ next
82
+ end
83
+
84
+ if noise?(line) || section_header?(line)
85
+ try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
86
+ above_lines, below_lines, date_line = [], [], nil
87
+ next
88
+ end
89
+
90
+ next if current_table.nil?
91
+
92
+ if date_line?(line)
93
+ try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
94
+ date_line = line
95
+ below_lines = []
96
+ next
97
+ end
98
+
99
+ if date_line
100
+ if new_transaction_marker?(line)
101
+ try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
102
+ date_line, below_lines = nil, []
103
+ above_lines = [line]
104
+ else
105
+ below_lines << line
106
+ end
107
+ else
108
+ above_lines << line
109
+ end
110
+ end
111
+
112
+ try_flush(date_line, above_lines, below_lines, current_table, current_currency, transactions)
113
+ transactions
114
+ end
115
+
116
+ private
117
+
118
+ def each_normalized_line(text)
119
+ text.each_line do |line|
120
+ normalized = line.tr("\u00A0", " ").strip
121
+ next if normalized.empty?
122
+
123
+ yield normalized
124
+ end
125
+ end
126
+
127
+ def table_header?(line)
128
+ line.match?(TABLE_HEADER_PATTERN)
129
+ end
130
+
131
+ def date_line?(line)
132
+ line.match?(DATE_PREFIX)
133
+ end
134
+
135
+ def noise?(line)
136
+ NOISE_PATTERNS.any? { |pattern| line.match?(pattern) }
137
+ end
138
+
139
+ def section_header?(line)
140
+ SECTION_HEADERS.any? { |header| line == header }
141
+ end
142
+
143
+ def new_transaction_marker?(line)
144
+ NEW_TRANSACTION_MARKERS.any? { |pattern| line.match?(pattern) }
145
+ end
146
+
147
+ def extract_column_positions(line)
148
+ {
149
+ debit: line.index("Debit"),
150
+ credit: line.index("Credit"),
151
+ sold: line.index("Sold")
152
+ }
153
+ end
154
+
155
+ def try_flush(date_line, above_lines, below_lines, table, currency, transactions)
156
+ return if date_line.nil? || table.nil?
157
+
158
+ transaction = build_transaction(date_line, above_lines, below_lines, table, currency)
159
+ transactions << transaction if transaction
160
+ end
161
+
162
+ def build_transaction(date_line, above_lines, below_lines, table, currency)
163
+ date_match = date_line.match(DATE_PREFIX)
164
+ return if date_match.nil?
165
+
166
+ amounts = date_line.to_enum(:scan, AMOUNT_PATTERN).map { Regexp.last_match }
167
+ return if amounts.size < 2
168
+
169
+ transaction_amount_match = amounts[-2]
170
+ description_start = date_match.end(0)
171
+ description_end = transaction_amount_match.begin(0)
172
+ main_description = date_line[description_start...description_end].to_s.strip
173
+
174
+ amount_string = transaction_amount_match[0]
175
+ amount = amount_string.delete(", ").to_f
176
+ midpoint = (table[:debit] + table[:credit]) / 2
177
+ amount = -amount if transaction_amount_match.begin(0) < midpoint
178
+
179
+ description = build_description(main_description, above_lines, below_lines)
180
+
181
+ Transaction.new(
182
+ parse_date(date_match[1].to_i, date_match[2], date_match[3].to_i),
183
+ description,
184
+ amount,
185
+ currency || extract_currency_from_header(date_line)
186
+ )
187
+ end
188
+
189
+ def parse_date(day, month_name, year)
190
+ month = ROMANIAN_MONTHS[month_name.downcase]
191
+ Date.new(year, month, day)
192
+ end
193
+
194
+ def build_description(main_desc, above_lines, below_lines)
195
+ parts = [*above_lines.map(&:strip), main_desc, *below_lines.map(&:strip)]
196
+ parts.reject(&:empty?).join(" | ")
197
+ end
198
+
199
+ def extract_currency_from_header(date_line)
200
+ m = date_line.match(CURRENCY_FROM_HEADER)
201
+ m ? m[1] : nil
202
+ end
6
203
  end
7
204
  end
8
205
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: extras_de_cont
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Denis Nutiu