reckon 0.3.8 → 0.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.ruby-version +1 -1
- data/Gemfile.lock +1 -1
- data/README.md +31 -17
- data/bin/reckon +1 -1
- data/lib/reckon.rb +2 -0
- data/lib/reckon/app.rb +15 -219
- data/lib/reckon/csv_parser.rb +259 -0
- data/lib/reckon/money.rb +150 -0
- data/reckon.gemspec +2 -2
- data/spec/reckon/app_spec.rb +14 -270
- data/spec/reckon/csv_parser_spec.rb +393 -0
- data/spec/reckon/date_column_spec.rb +39 -0
- data/spec/reckon/money_column_spec.rb +52 -0
- data/spec/reckon/money_spec.rb +68 -0
- metadata +28 -35
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: d685aa1ab6e16d39b1fbe75ff281f3f439d347c8
|
4
|
+
data.tar.gz: 0384cd69b6cc8d6a81a4530954ece1c16d2516f5
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 27f48bc47502d7dba8aa5ef5cc47ba149a679a1340a192006c8707d3492bc1e0dcb35a9e85a375b9525b8d5f72e2476b0ffcf3b555fa72a6db7de744d2108061
|
7
|
+
data.tar.gz: e5224c1a2bc89492f3d3d299edf4f0d9d1d17e4df3dfbd5629dc039d4325275730dbeaf1f04e11b14e05b679e4bc3ea975643d45eb5af69d6220b2e3055759b6
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
ruby-
|
1
|
+
ruby-2.0.0-p353
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -32,23 +32,32 @@ Learn more:
|
|
32
32
|
|
33
33
|
> reckon -h
|
34
34
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
35
|
+
Usage: Reckon.rb [options]
|
36
|
+
|
37
|
+
-f, --file FILE The CSV file to parse
|
38
|
+
-a, --account name The Ledger Account this file is for
|
39
|
+
-v, --[no-]verbose Run verbosely
|
40
|
+
-i, --inverse Use the negative of each amount
|
41
|
+
-p, --print-table Print out the parsed CSV in table form
|
42
|
+
-o, --output-file FILE The ledger file to append to
|
43
|
+
-l, --learn-from FILE An existing ledger file to learn accounts from
|
44
|
+
--ignore-columns 1,2,5
|
45
|
+
Columns to ignore in the CSV file - the first column is column 1
|
46
|
+
--contains-header
|
47
|
+
The first row of the CSV is a header and should be skipped
|
48
|
+
--csv-separator ','
|
49
|
+
Separator for parsing the CSV - default is comma.
|
50
|
+
--comma-separates-cents
|
51
|
+
Use comma instead of period to deliminate dollars from cents when parsing ($100,50 instead of $100.50)
|
52
|
+
--encoding
|
53
|
+
Specify an encoding for the CSV file
|
54
|
+
-c, --currency '$' Currency symbol to use, defaults to $ (£, EUR)
|
55
|
+
--date-format '%d/%m/%Y'
|
56
|
+
Force the date format (see Ruby DateTime strftime)
|
57
|
+
--suffixed
|
58
|
+
If --currency should be used as a suffix. Defaults to false.
|
59
|
+
-h, --help Show this message
|
60
|
+
--version Show version
|
52
61
|
|
53
62
|
If you find CSV files that it can't parse, send me examples or pull requests!
|
54
63
|
|
@@ -65,3 +74,8 @@ If you find CSV files that it can't parse, send me examples or pull requests!
|
|
65
74
|
## Copyright
|
66
75
|
|
67
76
|
Copyright (c) 2013 Andrew Cantino. See LICENSE for details.
|
77
|
+
|
78
|
+
Thanks to @BlackEdder for many contributions!
|
79
|
+
|
80
|
+
[](https://bitdeli.com/free "Bitdeli Badge")
|
81
|
+
|
data/bin/reckon
CHANGED
data/lib/reckon.rb
CHANGED
@@ -14,4 +14,6 @@ require 'terminal-table'
|
|
14
14
|
|
15
15
|
require File.expand_path(File.join(File.dirname(__FILE__), "reckon", "app"))
|
16
16
|
require File.expand_path(File.join(File.dirname(__FILE__), "reckon", "ledger_parser"))
|
17
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "reckon", "csv_parser"))
|
18
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "reckon", "money"))
|
17
19
|
|
data/lib/reckon/app.rb
CHANGED
@@ -4,7 +4,7 @@ require 'pp'
|
|
4
4
|
module Reckon
|
5
5
|
class App
|
6
6
|
VERSION = "Reckon 0.1"
|
7
|
-
attr_accessor :options, :
|
7
|
+
attr_accessor :options, :accounts, :tokens, :seen, :csv_parser
|
8
8
|
|
9
9
|
def initialize(options = {})
|
10
10
|
self.options = options
|
@@ -12,29 +12,18 @@ module Reckon
|
|
12
12
|
self.accounts = {}
|
13
13
|
self.seen = {}
|
14
14
|
self.options[:currency] ||= '$'
|
15
|
+
options[:string] = File.read(options[:file]) unless options[:string]
|
16
|
+
@csv_parser = CSVParser.new( options )
|
15
17
|
learn!
|
16
|
-
parse
|
17
|
-
filter_csv
|
18
|
-
detect_columns
|
19
|
-
end
|
20
|
-
|
21
|
-
def filter_csv
|
22
|
-
if options[:ignore_columns]
|
23
|
-
new_columns = []
|
24
|
-
columns.each_with_index do |column, index|
|
25
|
-
new_columns << column unless options[:ignore_columns].include?(index + 1)
|
26
|
-
end
|
27
|
-
@columns = new_columns
|
28
|
-
end
|
29
18
|
end
|
30
19
|
|
31
20
|
def learn_from(ledger)
|
32
21
|
LedgerParser.new(ledger).entries.each do |entry|
|
33
22
|
entry[:accounts].each do |account|
|
34
23
|
learn_about_account( account[:name],
|
35
|
-
|
24
|
+
[entry[:desc], account[:amount]].join(" ") ) unless account[:name] == options[:bank_account]
|
36
25
|
seen[entry[:date]] ||= {}
|
37
|
-
seen[entry[:date]][pretty_money(account[:amount])] = true
|
26
|
+
seen[entry[:date]][@csv_parser.pretty_money(account[:amount])] = true
|
38
27
|
end
|
39
28
|
end
|
40
29
|
end
|
@@ -156,63 +145,6 @@ module Reckon
|
|
156
145
|
out
|
157
146
|
end
|
158
147
|
|
159
|
-
def money_for(index)
|
160
|
-
value = money_column_indices.inject("") { |m, i| m + columns[i][index] }
|
161
|
-
value = value.gsub(/\./, '').gsub(/,/, '.') if options[:comma_separates_cents]
|
162
|
-
cleaned_value = value.gsub(/[^\d\.]/, '').to_f
|
163
|
-
cleaned_value *= -1 if value =~ /[\(\-]/
|
164
|
-
cleaned_value = -(cleaned_value) if options[:inverse]
|
165
|
-
cleaned_value
|
166
|
-
end
|
167
|
-
|
168
|
-
def pretty_money_for(index, negate = false)
|
169
|
-
pretty_money(money_for(index), negate)
|
170
|
-
end
|
171
|
-
|
172
|
-
def pretty_money(amount, negate = false)
|
173
|
-
currency = options[:currency]
|
174
|
-
if options[:suffixed]
|
175
|
-
(amount >= 0 ? " " : "") + sprintf("%0.2f #{currency}", amount * (negate ? -1 : 1))
|
176
|
-
else
|
177
|
-
(amount >= 0 ? " " : "") + sprintf("%0.2f", amount * (negate ? -1 : 1)).gsub(/^((\-)|)(?=\d)/, "\\1#{currency}")
|
178
|
-
end
|
179
|
-
end
|
180
|
-
|
181
|
-
def date_for(index)
|
182
|
-
value = columns[date_column_index][index]
|
183
|
-
if options[:date_format].nil?
|
184
|
-
value = [$1, $2, $3].join("/") if value =~ /^(\d{4})(\d{2})(\d{2})\d+\[\d+\:GMT\]$/ # chase format
|
185
|
-
value = [$3, $2, $1].join("/") if value =~ /^(\d{2})\.(\d{2})\.(\d{4})$/ # german format
|
186
|
-
value = [$3, $2, $1].join("/") if value =~ /^(\d{2})\-(\d{2})\-(\d{4})$/ # nordea format
|
187
|
-
value = [$1, $2, $3].join("/") if value =~ /^(\d{4})(\d{2})(\d{2})/ # yyyymmdd format
|
188
|
-
else
|
189
|
-
begin
|
190
|
-
value = Date.strptime(value, options[:date_format])
|
191
|
-
rescue
|
192
|
-
puts "I'm having trouble parsing #{value} with the desired format: #{options[:date_format]}"
|
193
|
-
exit 1
|
194
|
-
end
|
195
|
-
end
|
196
|
-
begin
|
197
|
-
guess = Chronic.parse(value, :context => :past)
|
198
|
-
if guess.to_i < 953236800 && value =~ /\//
|
199
|
-
guess = Chronic.parse((value.split("/")[0...-1] + [(2000 + value.split("/").last.to_i).to_s]).join("/"), :context => :past)
|
200
|
-
end
|
201
|
-
guess
|
202
|
-
rescue
|
203
|
-
puts "I'm having trouble parsing #{value}, which I thought was a date. Please report this so that we"
|
204
|
-
puts "can make this parser better!"
|
205
|
-
end
|
206
|
-
end
|
207
|
-
|
208
|
-
def pretty_date_for(index)
|
209
|
-
date_for(index).strftime("%Y/%m/%d")
|
210
|
-
end
|
211
|
-
|
212
|
-
def description_for(index)
|
213
|
-
description_column_indices.map { |i| columns[i][index] }.join("; ").squeeze(" ").gsub(/(;\s+){2,}/, '').strip
|
214
|
-
end
|
215
|
-
|
216
148
|
def output_table
|
217
149
|
output = Terminal::Table.new do |t|
|
218
150
|
t.headings = 'Date', 'Amount', 'Description'
|
@@ -223,148 +155,21 @@ module Reckon
|
|
223
155
|
puts output
|
224
156
|
end
|
225
157
|
|
226
|
-
def evaluate_columns(cols)
|
227
|
-
results = []
|
228
|
-
found_likely_money_column = false
|
229
|
-
cols.each_with_index do |column, index|
|
230
|
-
money_score = date_score = possible_neg_money_count = possible_pos_money_count = 0
|
231
|
-
last = nil
|
232
|
-
column.reverse.each_with_index do |entry, row_from_bottom|
|
233
|
-
row = csv_data[csv_data.length - 1 - row_from_bottom]
|
234
|
-
entry = entry.strip
|
235
|
-
money_score += 20 if entry[/^[\-\+\(]{0,2}\$/]
|
236
|
-
money_score += 20 if entry[/^\$?\-?\$?\d+[\.,\d]*?[\.,]\d\d$/]
|
237
|
-
money_score += entry.gsub(/[^\d\.\-\+,\(\)]/, '').length if entry.length < 7
|
238
|
-
money_score -= entry.length if entry.length > 8
|
239
|
-
money_score -= 20 if entry !~ /^[\$\+\.\-,\d\(\)]+$/
|
240
|
-
possible_neg_money_count += 1 if entry =~ /^\$?[\-\(]\$?\d+/
|
241
|
-
possible_pos_money_count += 1 if entry =~ /^\+?\$?\+?\d+/
|
242
|
-
date_score += 10 if entry =~ /\b(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)/i
|
243
|
-
date_score += 5 if entry =~ /^[\-\/\.\d:\[\]]+$/
|
244
|
-
date_score += entry.gsub(/[^\-\/\.\d:\[\]]/, '').length if entry.gsub(/[^\-\/\.\d:\[\]]/, '').length > 3
|
245
|
-
date_score -= entry.gsub(/[\-\/\.\d:\[\]]/, '').length
|
246
|
-
date_score += 30 if entry =~ /^\d+[:\/\.]\d+[:\/\.]\d+([ :]\d+[:\/\.]\d+)?$/
|
247
|
-
date_score += 10 if entry =~ /^\d+\[\d+:GMT\]$/i
|
248
|
-
|
249
|
-
# Try to determine if this is a balance column
|
250
|
-
entry_as_num = entry.gsub(/[^\-\d\.]/, '').to_f
|
251
|
-
if last && entry_as_num != 0 && last != 0
|
252
|
-
row.each do |row_entry|
|
253
|
-
row_entry = row_entry.to_s.gsub(/[^\-\d\.]/, '').to_f
|
254
|
-
if row_entry != 0 && last + row_entry == entry_as_num
|
255
|
-
money_score -= 10
|
256
|
-
break
|
257
|
-
end
|
258
|
-
end
|
259
|
-
end
|
260
|
-
last = entry_as_num
|
261
|
-
end
|
262
|
-
|
263
|
-
if possible_neg_money_count > (column.length / 5.0) && possible_pos_money_count > (column.length / 5.0)
|
264
|
-
money_score += 10 * column.length
|
265
|
-
found_likely_money_column = true
|
266
|
-
end
|
267
|
-
|
268
|
-
results << { :index => index, :money_score => money_score, :date_score => date_score }
|
269
|
-
end
|
270
|
-
|
271
|
-
return [results, found_likely_money_column]
|
272
|
-
end
|
273
|
-
|
274
|
-
def merge_columns(a, b)
|
275
|
-
output_columns = []
|
276
|
-
columns.each_with_index do |column, index|
|
277
|
-
if index == a
|
278
|
-
new_column = []
|
279
|
-
column.each_with_index do |row, row_index|
|
280
|
-
new_column << row + " " + (columns[b][row_index] || '')
|
281
|
-
end
|
282
|
-
output_columns << new_column
|
283
|
-
elsif index == b
|
284
|
-
# skip
|
285
|
-
else
|
286
|
-
output_columns << column
|
287
|
-
end
|
288
|
-
end
|
289
|
-
output_columns
|
290
|
-
end
|
291
|
-
|
292
|
-
def detect_columns
|
293
|
-
results, found_likely_money_column = evaluate_columns(columns)
|
294
|
-
self.money_column_indices = [ results.sort { |a, b| b[:money_score] <=> a[:money_score] }.first[:index] ]
|
295
|
-
|
296
|
-
if !found_likely_money_column
|
297
|
-
found_likely_double_money_columns = false
|
298
|
-
0.upto(columns.length - 2) do |i|
|
299
|
-
_, found_likely_double_money_columns = evaluate_columns(merge_columns(i, i+1))
|
300
|
-
|
301
|
-
if found_likely_double_money_columns
|
302
|
-
self.money_column_indices = [ i, i+1 ]
|
303
|
-
unless settings[:testing]
|
304
|
-
puts "It looks like this CSV has two seperate columns for money, one of which shows positive"
|
305
|
-
puts "changes and one of which shows negative changes. If this is true, great. Otherwise,"
|
306
|
-
puts "please report this issue to us so we can take a look!\n"
|
307
|
-
end
|
308
|
-
break
|
309
|
-
end
|
310
|
-
end
|
311
|
-
|
312
|
-
if !found_likely_double_money_columns && !settings[:testing]
|
313
|
-
puts "I didn't find a high-likelyhood money column, but I'm taking my best guess with column #{money_column_indices.first + 1}."
|
314
|
-
end
|
315
|
-
end
|
316
|
-
|
317
|
-
results.reject! {|i| money_column_indices.include?(i[:index]) }
|
318
|
-
self.date_column_index = results.sort { |a, b| b[:date_score] <=> a[:date_score] }.first[:index]
|
319
|
-
results.reject! {|i| i[:index] == date_column_index }
|
320
|
-
|
321
|
-
self.description_column_indices = results.map { |i| i[:index] }
|
322
|
-
end
|
323
|
-
|
324
158
|
def each_row_backwards
|
325
159
|
rows = []
|
326
|
-
(0
|
327
|
-
rows << { :date => date_for(index),
|
328
|
-
|
329
|
-
|
160
|
+
(0...@csv_parser.columns.first.length).to_a.each do |index|
|
161
|
+
rows << { :date => @csv_parser.date_for(index),
|
162
|
+
:pretty_date => @csv_parser.pretty_date_for(index),
|
163
|
+
:pretty_money => @csv_parser.pretty_money_for(index),
|
164
|
+
:pretty_money_negated => @csv_parser.pretty_money_for(index, :negate),
|
165
|
+
:money => @csv_parser.money_for(index),
|
166
|
+
:description => @csv_parser.description_for(index) }
|
330
167
|
end
|
331
168
|
rows.sort { |a, b| a[:date] <=> b[:date] }.each do |row|
|
332
169
|
yield row
|
333
170
|
end
|
334
171
|
end
|
335
172
|
|
336
|
-
def columns
|
337
|
-
@columns ||= begin
|
338
|
-
last_row_length = nil
|
339
|
-
csv_data.inject([]) do |memo, row|
|
340
|
-
# fail "Input CSV must have consistent row lengths." if last_row_length && row.length != last_row_length
|
341
|
-
unless row.all? { |i| i.nil? || i.length == 0 }
|
342
|
-
row.each_with_index do |entry, index|
|
343
|
-
memo[index] ||= []
|
344
|
-
memo[index] << (entry || '').strip
|
345
|
-
end
|
346
|
-
last_row_length = row.length
|
347
|
-
end
|
348
|
-
memo
|
349
|
-
end
|
350
|
-
end
|
351
|
-
end
|
352
|
-
|
353
|
-
def parse
|
354
|
-
data = options[:string] || File.read(options[:file])
|
355
|
-
|
356
|
-
if RUBY_VERSION =~ /^1\.9/ || RUBY_VERSION =~ /^2/
|
357
|
-
data = data.force_encoding(options[:encoding] || 'BINARY').encode('UTF-8', :invalid => :replace, :undef => :replace, :replace => '?')
|
358
|
-
csv_engine = CSV
|
359
|
-
else
|
360
|
-
csv_engine = FasterCSV
|
361
|
-
end
|
362
|
-
|
363
|
-
@csv_data = csv_engine.parse data.strip, :col_sep => options[:csv_separator] || ','
|
364
|
-
csv_data.shift if options[:contains_header]
|
365
|
-
csv_data
|
366
|
-
end
|
367
|
-
|
368
173
|
def self.parse_opts(args = ARGV)
|
369
174
|
options = { :output_file => STDOUT }
|
370
175
|
parser = OptionParser.new do |opts|
|
@@ -403,8 +208,9 @@ module Reckon
|
|
403
208
|
options[:ignore_columns] = ignore.split(",").map { |i| i.to_i }
|
404
209
|
end
|
405
210
|
|
406
|
-
opts.on("", "--contains-header", "The first row of the CSV is a header and should be skipped") do |contains_header|
|
407
|
-
options[:contains_header] =
|
211
|
+
opts.on("", "--contains-header [N]", "The first row of the CSV is a header and should be skipped. Optionally add the number of rows to skip.") do |contains_header|
|
212
|
+
options[:contains_header] = 1
|
213
|
+
options[:contains_header] = contains_header.to_i if contains_header
|
408
214
|
end
|
409
215
|
|
410
216
|
opts.on("", "--csv-separator ','", "Separator for parsing the CSV - default is comma.") do |csv_separator|
|
@@ -462,15 +268,5 @@ module Reckon
|
|
462
268
|
|
463
269
|
options
|
464
270
|
end
|
465
|
-
|
466
|
-
@settings = { :testing => false }
|
467
|
-
|
468
|
-
def self.settings
|
469
|
-
@settings
|
470
|
-
end
|
471
|
-
|
472
|
-
def settings
|
473
|
-
self.class.settings
|
474
|
-
end
|
475
271
|
end
|
476
272
|
end
|
@@ -0,0 +1,259 @@
|
|
1
|
+
#coding: utf-8
|
2
|
+
require 'pp'
|
3
|
+
|
4
|
+
module Reckon
|
5
|
+
class CSVParser
|
6
|
+
attr_accessor :options, :csv_data, :money_column_indices, :date_column_index, :description_column_indices, :money_column, :date_column
|
7
|
+
|
8
|
+
def initialize(options = {})
|
9
|
+
self.options = options
|
10
|
+
self.options[:currency] ||= '$'
|
11
|
+
parse
|
12
|
+
filter_csv
|
13
|
+
detect_columns
|
14
|
+
end
|
15
|
+
|
16
|
+
def filter_csv
|
17
|
+
if options[:ignore_columns]
|
18
|
+
new_columns = []
|
19
|
+
columns.each_with_index do |column, index|
|
20
|
+
new_columns << column unless options[:ignore_columns].include?(index + 1)
|
21
|
+
end
|
22
|
+
@columns = new_columns
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def money_for(index)
|
27
|
+
@money_column[index]
|
28
|
+
end
|
29
|
+
|
30
|
+
def pretty_money_for(index, negate = false)
|
31
|
+
money_for( index ).pretty( negate )
|
32
|
+
end
|
33
|
+
|
34
|
+
def pretty_money(amount, negate = false)
|
35
|
+
Money.new( amount, @options ).pretty( negate )
|
36
|
+
end
|
37
|
+
|
38
|
+
def date_for(index)
|
39
|
+
@date_column.for( index )
|
40
|
+
end
|
41
|
+
|
42
|
+
def pretty_date_for(index)
|
43
|
+
@date_column.pretty_for( index )
|
44
|
+
end
|
45
|
+
|
46
|
+
def description_for(index)
|
47
|
+
description_column_indices.map { |i| columns[i][index] }.join("; ").squeeze(" ").gsub(/(;\s+){2,}/, '').strip
|
48
|
+
end
|
49
|
+
|
50
|
+
def evaluate_columns(cols)
|
51
|
+
results = []
|
52
|
+
found_likely_money_column = false
|
53
|
+
cols.each_with_index do |column, index|
|
54
|
+
money_score = date_score = possible_neg_money_count = possible_pos_money_count = 0
|
55
|
+
last = nil
|
56
|
+
column.reverse.each_with_index do |entry, row_from_bottom|
|
57
|
+
row = csv_data[csv_data.length - 1 - row_from_bottom]
|
58
|
+
entry = entry.strip
|
59
|
+
money_score += Money::likelihood( entry )
|
60
|
+
possible_neg_money_count += 1 if entry =~ /^\$?[\-\(]\$?\d+/
|
61
|
+
possible_pos_money_count += 1 if entry =~ /^\+?\$?\+?\d+/
|
62
|
+
date_score += 10 if entry =~ /\b(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)/i
|
63
|
+
date_score += 5 if entry =~ /^[\-\/\.\d:\[\]]+$/
|
64
|
+
date_score += entry.gsub(/[^\-\/\.\d:\[\]]/, '').length if entry.gsub(/[^\-\/\.\d:\[\]]/, '').length > 3
|
65
|
+
date_score -= entry.gsub(/[\-\/\.\d:\[\]]/, '').length
|
66
|
+
date_score += 30 if entry =~ /^\d+[:\/\.]\d+[:\/\.]\d+([ :]\d+[:\/\.]\d+)?$/
|
67
|
+
date_score += 10 if entry =~ /^\d+\[\d+:GMT\]$/i
|
68
|
+
|
69
|
+
# Try to determine if this is a balance column
|
70
|
+
entry_as_num = entry.gsub(/[^\-\d\.]/, '').to_f
|
71
|
+
if last && entry_as_num != 0 && last != 0
|
72
|
+
row.each do |row_entry|
|
73
|
+
row_entry = row_entry.to_s.gsub(/[^\-\d\.]/, '').to_f
|
74
|
+
if row_entry != 0 && last + row_entry == entry_as_num
|
75
|
+
money_score -= 10
|
76
|
+
break
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
last = entry_as_num
|
81
|
+
end
|
82
|
+
|
83
|
+
if possible_neg_money_count > (column.length / 5.0) && possible_pos_money_count > (column.length / 5.0)
|
84
|
+
money_score += 10 * column.length
|
85
|
+
found_likely_money_column = true
|
86
|
+
end
|
87
|
+
|
88
|
+
results << { :index => index, :money_score => money_score, :date_score => date_score }
|
89
|
+
end
|
90
|
+
|
91
|
+
return [results, found_likely_money_column]
|
92
|
+
end
|
93
|
+
|
94
|
+
def merge_columns(a, b)
|
95
|
+
output_columns = []
|
96
|
+
columns.each_with_index do |column, index|
|
97
|
+
if index == a
|
98
|
+
new_column = []
|
99
|
+
column.each_with_index do |row, row_index|
|
100
|
+
new_column << row + " " + (columns[b][row_index] || '')
|
101
|
+
end
|
102
|
+
output_columns << new_column
|
103
|
+
elsif index == b
|
104
|
+
# skip
|
105
|
+
else
|
106
|
+
output_columns << column
|
107
|
+
end
|
108
|
+
end
|
109
|
+
output_columns
|
110
|
+
end
|
111
|
+
|
112
|
+
def evaluate_two_money_columns( columns, id1, id2, unmerged_results )
|
113
|
+
merged_columns = merge_columns( id1, id2 )
|
114
|
+
results, found_likely_money_column = evaluate_columns( merged_columns )
|
115
|
+
if !found_likely_money_column
|
116
|
+
new_res = results.find { |el| el[:index] == id1 }
|
117
|
+
old_res1 = unmerged_results.find { |el| el[:index] == id1 }
|
118
|
+
old_res2 = unmerged_results.find { |el| el[:index] == id2 }
|
119
|
+
if new_res[:money_score] > old_res1[:money_score] &&
|
120
|
+
new_res[:money_score] > old_res2[:money_score]
|
121
|
+
found_likely_money_column = true
|
122
|
+
end
|
123
|
+
end
|
124
|
+
[results, found_likely_money_column]
|
125
|
+
end
|
126
|
+
|
127
|
+
def found_double_money_column( id1, id2 )
|
128
|
+
self.money_column_indices = [ id1, id2 ]
|
129
|
+
unless settings[:testing]
|
130
|
+
puts "It looks like this CSV has two seperate columns for money, one of which shows positive"
|
131
|
+
puts "changes and one of which shows negative changes. If this is true, great. Otherwise,"
|
132
|
+
puts "please report this issue to us so we can take a look!\n"
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
# Some csv files negative/positive amounts are indicated in separate account
|
137
|
+
def detect_sign_column
|
138
|
+
return if columns[0].length <= 2 # This test needs requires more than two rows otherwise will lead to false positives
|
139
|
+
signs = []
|
140
|
+
if @money_column_indices[0] > 0
|
141
|
+
column = columns[ @money_column_indices[0] - 1 ]
|
142
|
+
signs = column.uniq
|
143
|
+
end
|
144
|
+
if (signs.length != 2 &&
|
145
|
+
(@money_column_indices[0] + 1 < columns.length))
|
146
|
+
column = columns[ @money_column_indices[0] + 1 ]
|
147
|
+
signs = column.uniq
|
148
|
+
end
|
149
|
+
if signs.length == 2
|
150
|
+
negative_first = true
|
151
|
+
negative_first = false if signs[0] == "Bij" || signs[0].downcase =~ /^cr/ # look for known debit indicators
|
152
|
+
@money_column.each_with_index do |money, i|
|
153
|
+
if negative_first && column[i] == signs[0]
|
154
|
+
@money_column[i] = -money
|
155
|
+
elsif !negative_first && column[i] == signs[1]
|
156
|
+
@money_column[i] = -money
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
def detect_columns
|
163
|
+
results, found_likely_money_column = evaluate_columns(columns)
|
164
|
+
self.money_column_indices = [ results.sort { |a, b| b[:money_score] <=> a[:money_score] }.first[:index] ]
|
165
|
+
|
166
|
+
if !found_likely_money_column
|
167
|
+
found_likely_double_money_columns = false
|
168
|
+
0.upto(columns.length - 2) do |i|
|
169
|
+
if MoneyColumn.new( columns[i] ).merge!( MoneyColumn.new( columns[i+1] ) )
|
170
|
+
_, found_likely_double_money_columns = evaluate_columns(merge_columns(i, i+1))
|
171
|
+
if found_likely_double_money_columns
|
172
|
+
found_double_money_column( i, i + 1 )
|
173
|
+
break
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
if !found_likely_double_money_columns
|
179
|
+
0.upto(columns.length - 2) do |i|
|
180
|
+
if MoneyColumn.new( columns[i] ).merge!( MoneyColumn.new( columns[i+1] ) )
|
181
|
+
# Try a more specific test
|
182
|
+
_, found_likely_double_money_columns = evaluate_two_money_columns( columns, i, i+1, results )
|
183
|
+
if found_likely_double_money_columns
|
184
|
+
found_double_money_column( i, i + 1 )
|
185
|
+
break
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
if !found_likely_double_money_columns && !settings[:testing]
|
192
|
+
puts "I didn't find a high-likelyhood money column, but I'm taking my best guess with column #{money_column_indices.first + 1}."
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
results.reject! {|i| money_column_indices.include?(i[:index]) }
|
197
|
+
self.date_column_index = results.sort { |a, b| b[:date_score] <=> a[:date_score] }.first[:index]
|
198
|
+
results.reject! {|i| i[:index] == date_column_index }
|
199
|
+
@date_column = DateColumn.new( columns[ self.date_column_index ], @options )
|
200
|
+
|
201
|
+
if ( money_column_indices.length == 1 )
|
202
|
+
@money_column = MoneyColumn.new( columns[money_column_indices[0]],
|
203
|
+
@options )
|
204
|
+
detect_sign_column if @money_column.positive?
|
205
|
+
else
|
206
|
+
@money_column = MoneyColumn.new( columns[money_column_indices[0]],
|
207
|
+
@options )
|
208
|
+
@money_column.merge!(
|
209
|
+
MoneyColumn.new( columns[money_column_indices[1]], @options ) )
|
210
|
+
end
|
211
|
+
|
212
|
+
self.description_column_indices = results.map { |i| i[:index] }
|
213
|
+
end
|
214
|
+
|
215
|
+
def columns
|
216
|
+
@columns ||= begin
|
217
|
+
last_row_length = nil
|
218
|
+
csv_data.inject([]) do |memo, row|
|
219
|
+
# fail "Input CSV must have consistent row lengths." if last_row_length && row.length != last_row_length
|
220
|
+
unless row.all? { |i| i.nil? || i.length == 0 }
|
221
|
+
row.each_with_index do |entry, index|
|
222
|
+
memo[index] ||= []
|
223
|
+
memo[index] << (entry || '').strip
|
224
|
+
end
|
225
|
+
last_row_length = row.length
|
226
|
+
end
|
227
|
+
memo
|
228
|
+
end
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
def parse
|
233
|
+
data = options[:string] || File.read(options[:file])
|
234
|
+
|
235
|
+
if RUBY_VERSION =~ /^1\.9/ || RUBY_VERSION =~ /^2/
|
236
|
+
data = data.force_encoding(options[:encoding] || 'BINARY').encode('UTF-8', :invalid => :replace, :undef => :replace, :replace => '?')
|
237
|
+
csv_engine = CSV
|
238
|
+
else
|
239
|
+
csv_engine = FasterCSV
|
240
|
+
end
|
241
|
+
|
242
|
+
@csv_data = csv_engine.parse data.strip, :col_sep => options[:csv_separator] || ','
|
243
|
+
if options[:contains_header]
|
244
|
+
options[:contains_header].times { csv_data.shift }
|
245
|
+
end
|
246
|
+
csv_data
|
247
|
+
end
|
248
|
+
|
249
|
+
@settings = { :testing => false }
|
250
|
+
|
251
|
+
def self.settings
|
252
|
+
@settings
|
253
|
+
end
|
254
|
+
|
255
|
+
def settings
|
256
|
+
self.class.settings
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end
|