reckon 0.3.8 → 0.3.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.ruby-version +1 -1
- data/Gemfile.lock +1 -1
- data/README.md +31 -17
- data/bin/reckon +1 -1
- data/lib/reckon.rb +2 -0
- data/lib/reckon/app.rb +15 -219
- data/lib/reckon/csv_parser.rb +259 -0
- data/lib/reckon/money.rb +150 -0
- data/reckon.gemspec +2 -2
- data/spec/reckon/app_spec.rb +14 -270
- data/spec/reckon/csv_parser_spec.rb +393 -0
- data/spec/reckon/date_column_spec.rb +39 -0
- data/spec/reckon/money_column_spec.rb +52 -0
- data/spec/reckon/money_spec.rb +68 -0
- metadata +28 -35
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: d685aa1ab6e16d39b1fbe75ff281f3f439d347c8
|
4
|
+
data.tar.gz: 0384cd69b6cc8d6a81a4530954ece1c16d2516f5
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 27f48bc47502d7dba8aa5ef5cc47ba149a679a1340a192006c8707d3492bc1e0dcb35a9e85a375b9525b8d5f72e2476b0ffcf3b555fa72a6db7de744d2108061
|
7
|
+
data.tar.gz: e5224c1a2bc89492f3d3d299edf4f0d9d1d17e4df3dfbd5629dc039d4325275730dbeaf1f04e11b14e05b679e4bc3ea975643d45eb5af69d6220b2e3055759b6
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
ruby-
|
1
|
+
ruby-2.0.0-p353
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -32,23 +32,32 @@ Learn more:
|
|
32
32
|
|
33
33
|
> reckon -h
|
34
34
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
35
|
+
Usage: Reckon.rb [options]
|
36
|
+
|
37
|
+
-f, --file FILE The CSV file to parse
|
38
|
+
-a, --account name The Ledger Account this file is for
|
39
|
+
-v, --[no-]verbose Run verbosely
|
40
|
+
-i, --inverse Use the negative of each amount
|
41
|
+
-p, --print-table Print out the parsed CSV in table form
|
42
|
+
-o, --output-file FILE The ledger file to append to
|
43
|
+
-l, --learn-from FILE An existing ledger file to learn accounts from
|
44
|
+
--ignore-columns 1,2,5
|
45
|
+
Columns to ignore in the CSV file - the first column is column 1
|
46
|
+
--contains-header
|
47
|
+
The first row of the CSV is a header and should be skipped
|
48
|
+
--csv-separator ','
|
49
|
+
Separator for parsing the CSV - default is comma.
|
50
|
+
--comma-separates-cents
|
51
|
+
Use comma instead of period to deliminate dollars from cents when parsing ($100,50 instead of $100.50)
|
52
|
+
--encoding
|
53
|
+
Specify an encoding for the CSV file
|
54
|
+
-c, --currency '$' Currency symbol to use, defaults to $ (£, EUR)
|
55
|
+
--date-format '%d/%m/%Y'
|
56
|
+
Force the date format (see Ruby DateTime strftime)
|
57
|
+
--suffixed
|
58
|
+
If --currency should be used as a suffix. Defaults to false.
|
59
|
+
-h, --help Show this message
|
60
|
+
--version Show version
|
52
61
|
|
53
62
|
If you find CSV files that it can't parse, send me examples or pull requests!
|
54
63
|
|
@@ -65,3 +74,8 @@ If you find CSV files that it can't parse, send me examples or pull requests!
|
|
65
74
|
## Copyright
|
66
75
|
|
67
76
|
Copyright (c) 2013 Andrew Cantino. See LICENSE for details.
|
77
|
+
|
78
|
+
Thanks to @BlackEdder for many contributions!
|
79
|
+
|
80
|
+
[![Bitdeli Badge](https://d2weczhvl823v0.cloudfront.net/cantino/reckon/trend.png)](https://bitdeli.com/free "Bitdeli Badge")
|
81
|
+
|
data/bin/reckon
CHANGED
data/lib/reckon.rb
CHANGED
@@ -14,4 +14,6 @@ require 'terminal-table'
|
|
14
14
|
|
15
15
|
require File.expand_path(File.join(File.dirname(__FILE__), "reckon", "app"))
|
16
16
|
require File.expand_path(File.join(File.dirname(__FILE__), "reckon", "ledger_parser"))
|
17
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "reckon", "csv_parser"))
|
18
|
+
require File.expand_path(File.join(File.dirname(__FILE__), "reckon", "money"))
|
17
19
|
|
data/lib/reckon/app.rb
CHANGED
@@ -4,7 +4,7 @@ require 'pp'
|
|
4
4
|
module Reckon
|
5
5
|
class App
|
6
6
|
VERSION = "Reckon 0.1"
|
7
|
-
attr_accessor :options, :
|
7
|
+
attr_accessor :options, :accounts, :tokens, :seen, :csv_parser
|
8
8
|
|
9
9
|
def initialize(options = {})
|
10
10
|
self.options = options
|
@@ -12,29 +12,18 @@ module Reckon
|
|
12
12
|
self.accounts = {}
|
13
13
|
self.seen = {}
|
14
14
|
self.options[:currency] ||= '$'
|
15
|
+
options[:string] = File.read(options[:file]) unless options[:string]
|
16
|
+
@csv_parser = CSVParser.new( options )
|
15
17
|
learn!
|
16
|
-
parse
|
17
|
-
filter_csv
|
18
|
-
detect_columns
|
19
|
-
end
|
20
|
-
|
21
|
-
def filter_csv
|
22
|
-
if options[:ignore_columns]
|
23
|
-
new_columns = []
|
24
|
-
columns.each_with_index do |column, index|
|
25
|
-
new_columns << column unless options[:ignore_columns].include?(index + 1)
|
26
|
-
end
|
27
|
-
@columns = new_columns
|
28
|
-
end
|
29
18
|
end
|
30
19
|
|
31
20
|
def learn_from(ledger)
|
32
21
|
LedgerParser.new(ledger).entries.each do |entry|
|
33
22
|
entry[:accounts].each do |account|
|
34
23
|
learn_about_account( account[:name],
|
35
|
-
|
24
|
+
[entry[:desc], account[:amount]].join(" ") ) unless account[:name] == options[:bank_account]
|
36
25
|
seen[entry[:date]] ||= {}
|
37
|
-
seen[entry[:date]][pretty_money(account[:amount])] = true
|
26
|
+
seen[entry[:date]][@csv_parser.pretty_money(account[:amount])] = true
|
38
27
|
end
|
39
28
|
end
|
40
29
|
end
|
@@ -156,63 +145,6 @@ module Reckon
|
|
156
145
|
out
|
157
146
|
end
|
158
147
|
|
159
|
-
def money_for(index)
|
160
|
-
value = money_column_indices.inject("") { |m, i| m + columns[i][index] }
|
161
|
-
value = value.gsub(/\./, '').gsub(/,/, '.') if options[:comma_separates_cents]
|
162
|
-
cleaned_value = value.gsub(/[^\d\.]/, '').to_f
|
163
|
-
cleaned_value *= -1 if value =~ /[\(\-]/
|
164
|
-
cleaned_value = -(cleaned_value) if options[:inverse]
|
165
|
-
cleaned_value
|
166
|
-
end
|
167
|
-
|
168
|
-
def pretty_money_for(index, negate = false)
|
169
|
-
pretty_money(money_for(index), negate)
|
170
|
-
end
|
171
|
-
|
172
|
-
def pretty_money(amount, negate = false)
|
173
|
-
currency = options[:currency]
|
174
|
-
if options[:suffixed]
|
175
|
-
(amount >= 0 ? " " : "") + sprintf("%0.2f #{currency}", amount * (negate ? -1 : 1))
|
176
|
-
else
|
177
|
-
(amount >= 0 ? " " : "") + sprintf("%0.2f", amount * (negate ? -1 : 1)).gsub(/^((\-)|)(?=\d)/, "\\1#{currency}")
|
178
|
-
end
|
179
|
-
end
|
180
|
-
|
181
|
-
def date_for(index)
|
182
|
-
value = columns[date_column_index][index]
|
183
|
-
if options[:date_format].nil?
|
184
|
-
value = [$1, $2, $3].join("/") if value =~ /^(\d{4})(\d{2})(\d{2})\d+\[\d+\:GMT\]$/ # chase format
|
185
|
-
value = [$3, $2, $1].join("/") if value =~ /^(\d{2})\.(\d{2})\.(\d{4})$/ # german format
|
186
|
-
value = [$3, $2, $1].join("/") if value =~ /^(\d{2})\-(\d{2})\-(\d{4})$/ # nordea format
|
187
|
-
value = [$1, $2, $3].join("/") if value =~ /^(\d{4})(\d{2})(\d{2})/ # yyyymmdd format
|
188
|
-
else
|
189
|
-
begin
|
190
|
-
value = Date.strptime(value, options[:date_format])
|
191
|
-
rescue
|
192
|
-
puts "I'm having trouble parsing #{value} with the desired format: #{options[:date_format]}"
|
193
|
-
exit 1
|
194
|
-
end
|
195
|
-
end
|
196
|
-
begin
|
197
|
-
guess = Chronic.parse(value, :context => :past)
|
198
|
-
if guess.to_i < 953236800 && value =~ /\//
|
199
|
-
guess = Chronic.parse((value.split("/")[0...-1] + [(2000 + value.split("/").last.to_i).to_s]).join("/"), :context => :past)
|
200
|
-
end
|
201
|
-
guess
|
202
|
-
rescue
|
203
|
-
puts "I'm having trouble parsing #{value}, which I thought was a date. Please report this so that we"
|
204
|
-
puts "can make this parser better!"
|
205
|
-
end
|
206
|
-
end
|
207
|
-
|
208
|
-
def pretty_date_for(index)
|
209
|
-
date_for(index).strftime("%Y/%m/%d")
|
210
|
-
end
|
211
|
-
|
212
|
-
def description_for(index)
|
213
|
-
description_column_indices.map { |i| columns[i][index] }.join("; ").squeeze(" ").gsub(/(;\s+){2,}/, '').strip
|
214
|
-
end
|
215
|
-
|
216
148
|
def output_table
|
217
149
|
output = Terminal::Table.new do |t|
|
218
150
|
t.headings = 'Date', 'Amount', 'Description'
|
@@ -223,148 +155,21 @@ module Reckon
|
|
223
155
|
puts output
|
224
156
|
end
|
225
157
|
|
226
|
-
def evaluate_columns(cols)
|
227
|
-
results = []
|
228
|
-
found_likely_money_column = false
|
229
|
-
cols.each_with_index do |column, index|
|
230
|
-
money_score = date_score = possible_neg_money_count = possible_pos_money_count = 0
|
231
|
-
last = nil
|
232
|
-
column.reverse.each_with_index do |entry, row_from_bottom|
|
233
|
-
row = csv_data[csv_data.length - 1 - row_from_bottom]
|
234
|
-
entry = entry.strip
|
235
|
-
money_score += 20 if entry[/^[\-\+\(]{0,2}\$/]
|
236
|
-
money_score += 20 if entry[/^\$?\-?\$?\d+[\.,\d]*?[\.,]\d\d$/]
|
237
|
-
money_score += entry.gsub(/[^\d\.\-\+,\(\)]/, '').length if entry.length < 7
|
238
|
-
money_score -= entry.length if entry.length > 8
|
239
|
-
money_score -= 20 if entry !~ /^[\$\+\.\-,\d\(\)]+$/
|
240
|
-
possible_neg_money_count += 1 if entry =~ /^\$?[\-\(]\$?\d+/
|
241
|
-
possible_pos_money_count += 1 if entry =~ /^\+?\$?\+?\d+/
|
242
|
-
date_score += 10 if entry =~ /\b(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)/i
|
243
|
-
date_score += 5 if entry =~ /^[\-\/\.\d:\[\]]+$/
|
244
|
-
date_score += entry.gsub(/[^\-\/\.\d:\[\]]/, '').length if entry.gsub(/[^\-\/\.\d:\[\]]/, '').length > 3
|
245
|
-
date_score -= entry.gsub(/[\-\/\.\d:\[\]]/, '').length
|
246
|
-
date_score += 30 if entry =~ /^\d+[:\/\.]\d+[:\/\.]\d+([ :]\d+[:\/\.]\d+)?$/
|
247
|
-
date_score += 10 if entry =~ /^\d+\[\d+:GMT\]$/i
|
248
|
-
|
249
|
-
# Try to determine if this is a balance column
|
250
|
-
entry_as_num = entry.gsub(/[^\-\d\.]/, '').to_f
|
251
|
-
if last && entry_as_num != 0 && last != 0
|
252
|
-
row.each do |row_entry|
|
253
|
-
row_entry = row_entry.to_s.gsub(/[^\-\d\.]/, '').to_f
|
254
|
-
if row_entry != 0 && last + row_entry == entry_as_num
|
255
|
-
money_score -= 10
|
256
|
-
break
|
257
|
-
end
|
258
|
-
end
|
259
|
-
end
|
260
|
-
last = entry_as_num
|
261
|
-
end
|
262
|
-
|
263
|
-
if possible_neg_money_count > (column.length / 5.0) && possible_pos_money_count > (column.length / 5.0)
|
264
|
-
money_score += 10 * column.length
|
265
|
-
found_likely_money_column = true
|
266
|
-
end
|
267
|
-
|
268
|
-
results << { :index => index, :money_score => money_score, :date_score => date_score }
|
269
|
-
end
|
270
|
-
|
271
|
-
return [results, found_likely_money_column]
|
272
|
-
end
|
273
|
-
|
274
|
-
def merge_columns(a, b)
|
275
|
-
output_columns = []
|
276
|
-
columns.each_with_index do |column, index|
|
277
|
-
if index == a
|
278
|
-
new_column = []
|
279
|
-
column.each_with_index do |row, row_index|
|
280
|
-
new_column << row + " " + (columns[b][row_index] || '')
|
281
|
-
end
|
282
|
-
output_columns << new_column
|
283
|
-
elsif index == b
|
284
|
-
# skip
|
285
|
-
else
|
286
|
-
output_columns << column
|
287
|
-
end
|
288
|
-
end
|
289
|
-
output_columns
|
290
|
-
end
|
291
|
-
|
292
|
-
def detect_columns
|
293
|
-
results, found_likely_money_column = evaluate_columns(columns)
|
294
|
-
self.money_column_indices = [ results.sort { |a, b| b[:money_score] <=> a[:money_score] }.first[:index] ]
|
295
|
-
|
296
|
-
if !found_likely_money_column
|
297
|
-
found_likely_double_money_columns = false
|
298
|
-
0.upto(columns.length - 2) do |i|
|
299
|
-
_, found_likely_double_money_columns = evaluate_columns(merge_columns(i, i+1))
|
300
|
-
|
301
|
-
if found_likely_double_money_columns
|
302
|
-
self.money_column_indices = [ i, i+1 ]
|
303
|
-
unless settings[:testing]
|
304
|
-
puts "It looks like this CSV has two seperate columns for money, one of which shows positive"
|
305
|
-
puts "changes and one of which shows negative changes. If this is true, great. Otherwise,"
|
306
|
-
puts "please report this issue to us so we can take a look!\n"
|
307
|
-
end
|
308
|
-
break
|
309
|
-
end
|
310
|
-
end
|
311
|
-
|
312
|
-
if !found_likely_double_money_columns && !settings[:testing]
|
313
|
-
puts "I didn't find a high-likelyhood money column, but I'm taking my best guess with column #{money_column_indices.first + 1}."
|
314
|
-
end
|
315
|
-
end
|
316
|
-
|
317
|
-
results.reject! {|i| money_column_indices.include?(i[:index]) }
|
318
|
-
self.date_column_index = results.sort { |a, b| b[:date_score] <=> a[:date_score] }.first[:index]
|
319
|
-
results.reject! {|i| i[:index] == date_column_index }
|
320
|
-
|
321
|
-
self.description_column_indices = results.map { |i| i[:index] }
|
322
|
-
end
|
323
|
-
|
324
158
|
def each_row_backwards
|
325
159
|
rows = []
|
326
|
-
(0
|
327
|
-
rows << { :date => date_for(index),
|
328
|
-
|
329
|
-
|
160
|
+
(0...@csv_parser.columns.first.length).to_a.each do |index|
|
161
|
+
rows << { :date => @csv_parser.date_for(index),
|
162
|
+
:pretty_date => @csv_parser.pretty_date_for(index),
|
163
|
+
:pretty_money => @csv_parser.pretty_money_for(index),
|
164
|
+
:pretty_money_negated => @csv_parser.pretty_money_for(index, :negate),
|
165
|
+
:money => @csv_parser.money_for(index),
|
166
|
+
:description => @csv_parser.description_for(index) }
|
330
167
|
end
|
331
168
|
rows.sort { |a, b| a[:date] <=> b[:date] }.each do |row|
|
332
169
|
yield row
|
333
170
|
end
|
334
171
|
end
|
335
172
|
|
336
|
-
def columns
|
337
|
-
@columns ||= begin
|
338
|
-
last_row_length = nil
|
339
|
-
csv_data.inject([]) do |memo, row|
|
340
|
-
# fail "Input CSV must have consistent row lengths." if last_row_length && row.length != last_row_length
|
341
|
-
unless row.all? { |i| i.nil? || i.length == 0 }
|
342
|
-
row.each_with_index do |entry, index|
|
343
|
-
memo[index] ||= []
|
344
|
-
memo[index] << (entry || '').strip
|
345
|
-
end
|
346
|
-
last_row_length = row.length
|
347
|
-
end
|
348
|
-
memo
|
349
|
-
end
|
350
|
-
end
|
351
|
-
end
|
352
|
-
|
353
|
-
def parse
|
354
|
-
data = options[:string] || File.read(options[:file])
|
355
|
-
|
356
|
-
if RUBY_VERSION =~ /^1\.9/ || RUBY_VERSION =~ /^2/
|
357
|
-
data = data.force_encoding(options[:encoding] || 'BINARY').encode('UTF-8', :invalid => :replace, :undef => :replace, :replace => '?')
|
358
|
-
csv_engine = CSV
|
359
|
-
else
|
360
|
-
csv_engine = FasterCSV
|
361
|
-
end
|
362
|
-
|
363
|
-
@csv_data = csv_engine.parse data.strip, :col_sep => options[:csv_separator] || ','
|
364
|
-
csv_data.shift if options[:contains_header]
|
365
|
-
csv_data
|
366
|
-
end
|
367
|
-
|
368
173
|
def self.parse_opts(args = ARGV)
|
369
174
|
options = { :output_file => STDOUT }
|
370
175
|
parser = OptionParser.new do |opts|
|
@@ -403,8 +208,9 @@ module Reckon
|
|
403
208
|
options[:ignore_columns] = ignore.split(",").map { |i| i.to_i }
|
404
209
|
end
|
405
210
|
|
406
|
-
opts.on("", "--contains-header", "The first row of the CSV is a header and should be skipped") do |contains_header|
|
407
|
-
options[:contains_header] =
|
211
|
+
opts.on("", "--contains-header [N]", "The first row of the CSV is a header and should be skipped. Optionally add the number of rows to skip.") do |contains_header|
|
212
|
+
options[:contains_header] = 1
|
213
|
+
options[:contains_header] = contains_header.to_i if contains_header
|
408
214
|
end
|
409
215
|
|
410
216
|
opts.on("", "--csv-separator ','", "Separator for parsing the CSV - default is comma.") do |csv_separator|
|
@@ -462,15 +268,5 @@ module Reckon
|
|
462
268
|
|
463
269
|
options
|
464
270
|
end
|
465
|
-
|
466
|
-
@settings = { :testing => false }
|
467
|
-
|
468
|
-
def self.settings
|
469
|
-
@settings
|
470
|
-
end
|
471
|
-
|
472
|
-
def settings
|
473
|
-
self.class.settings
|
474
|
-
end
|
475
271
|
end
|
476
272
|
end
|
@@ -0,0 +1,259 @@
|
|
1
|
+
#coding: utf-8
|
2
|
+
require 'pp'
|
3
|
+
|
4
|
+
module Reckon
|
5
|
+
class CSVParser
|
6
|
+
attr_accessor :options, :csv_data, :money_column_indices, :date_column_index, :description_column_indices, :money_column, :date_column
|
7
|
+
|
8
|
+
def initialize(options = {})
|
9
|
+
self.options = options
|
10
|
+
self.options[:currency] ||= '$'
|
11
|
+
parse
|
12
|
+
filter_csv
|
13
|
+
detect_columns
|
14
|
+
end
|
15
|
+
|
16
|
+
def filter_csv
|
17
|
+
if options[:ignore_columns]
|
18
|
+
new_columns = []
|
19
|
+
columns.each_with_index do |column, index|
|
20
|
+
new_columns << column unless options[:ignore_columns].include?(index + 1)
|
21
|
+
end
|
22
|
+
@columns = new_columns
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def money_for(index)
|
27
|
+
@money_column[index]
|
28
|
+
end
|
29
|
+
|
30
|
+
def pretty_money_for(index, negate = false)
|
31
|
+
money_for( index ).pretty( negate )
|
32
|
+
end
|
33
|
+
|
34
|
+
def pretty_money(amount, negate = false)
|
35
|
+
Money.new( amount, @options ).pretty( negate )
|
36
|
+
end
|
37
|
+
|
38
|
+
def date_for(index)
|
39
|
+
@date_column.for( index )
|
40
|
+
end
|
41
|
+
|
42
|
+
def pretty_date_for(index)
|
43
|
+
@date_column.pretty_for( index )
|
44
|
+
end
|
45
|
+
|
46
|
+
def description_for(index)
|
47
|
+
description_column_indices.map { |i| columns[i][index] }.join("; ").squeeze(" ").gsub(/(;\s+){2,}/, '').strip
|
48
|
+
end
|
49
|
+
|
50
|
+
def evaluate_columns(cols)
|
51
|
+
results = []
|
52
|
+
found_likely_money_column = false
|
53
|
+
cols.each_with_index do |column, index|
|
54
|
+
money_score = date_score = possible_neg_money_count = possible_pos_money_count = 0
|
55
|
+
last = nil
|
56
|
+
column.reverse.each_with_index do |entry, row_from_bottom|
|
57
|
+
row = csv_data[csv_data.length - 1 - row_from_bottom]
|
58
|
+
entry = entry.strip
|
59
|
+
money_score += Money::likelihood( entry )
|
60
|
+
possible_neg_money_count += 1 if entry =~ /^\$?[\-\(]\$?\d+/
|
61
|
+
possible_pos_money_count += 1 if entry =~ /^\+?\$?\+?\d+/
|
62
|
+
date_score += 10 if entry =~ /\b(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)/i
|
63
|
+
date_score += 5 if entry =~ /^[\-\/\.\d:\[\]]+$/
|
64
|
+
date_score += entry.gsub(/[^\-\/\.\d:\[\]]/, '').length if entry.gsub(/[^\-\/\.\d:\[\]]/, '').length > 3
|
65
|
+
date_score -= entry.gsub(/[\-\/\.\d:\[\]]/, '').length
|
66
|
+
date_score += 30 if entry =~ /^\d+[:\/\.]\d+[:\/\.]\d+([ :]\d+[:\/\.]\d+)?$/
|
67
|
+
date_score += 10 if entry =~ /^\d+\[\d+:GMT\]$/i
|
68
|
+
|
69
|
+
# Try to determine if this is a balance column
|
70
|
+
entry_as_num = entry.gsub(/[^\-\d\.]/, '').to_f
|
71
|
+
if last && entry_as_num != 0 && last != 0
|
72
|
+
row.each do |row_entry|
|
73
|
+
row_entry = row_entry.to_s.gsub(/[^\-\d\.]/, '').to_f
|
74
|
+
if row_entry != 0 && last + row_entry == entry_as_num
|
75
|
+
money_score -= 10
|
76
|
+
break
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
last = entry_as_num
|
81
|
+
end
|
82
|
+
|
83
|
+
if possible_neg_money_count > (column.length / 5.0) && possible_pos_money_count > (column.length / 5.0)
|
84
|
+
money_score += 10 * column.length
|
85
|
+
found_likely_money_column = true
|
86
|
+
end
|
87
|
+
|
88
|
+
results << { :index => index, :money_score => money_score, :date_score => date_score }
|
89
|
+
end
|
90
|
+
|
91
|
+
return [results, found_likely_money_column]
|
92
|
+
end
|
93
|
+
|
94
|
+
def merge_columns(a, b)
|
95
|
+
output_columns = []
|
96
|
+
columns.each_with_index do |column, index|
|
97
|
+
if index == a
|
98
|
+
new_column = []
|
99
|
+
column.each_with_index do |row, row_index|
|
100
|
+
new_column << row + " " + (columns[b][row_index] || '')
|
101
|
+
end
|
102
|
+
output_columns << new_column
|
103
|
+
elsif index == b
|
104
|
+
# skip
|
105
|
+
else
|
106
|
+
output_columns << column
|
107
|
+
end
|
108
|
+
end
|
109
|
+
output_columns
|
110
|
+
end
|
111
|
+
|
112
|
+
def evaluate_two_money_columns( columns, id1, id2, unmerged_results )
|
113
|
+
merged_columns = merge_columns( id1, id2 )
|
114
|
+
results, found_likely_money_column = evaluate_columns( merged_columns )
|
115
|
+
if !found_likely_money_column
|
116
|
+
new_res = results.find { |el| el[:index] == id1 }
|
117
|
+
old_res1 = unmerged_results.find { |el| el[:index] == id1 }
|
118
|
+
old_res2 = unmerged_results.find { |el| el[:index] == id2 }
|
119
|
+
if new_res[:money_score] > old_res1[:money_score] &&
|
120
|
+
new_res[:money_score] > old_res2[:money_score]
|
121
|
+
found_likely_money_column = true
|
122
|
+
end
|
123
|
+
end
|
124
|
+
[results, found_likely_money_column]
|
125
|
+
end
|
126
|
+
|
127
|
+
def found_double_money_column( id1, id2 )
|
128
|
+
self.money_column_indices = [ id1, id2 ]
|
129
|
+
unless settings[:testing]
|
130
|
+
puts "It looks like this CSV has two seperate columns for money, one of which shows positive"
|
131
|
+
puts "changes and one of which shows negative changes. If this is true, great. Otherwise,"
|
132
|
+
puts "please report this issue to us so we can take a look!\n"
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
# Some csv files negative/positive amounts are indicated in separate account
|
137
|
+
def detect_sign_column
|
138
|
+
return if columns[0].length <= 2 # This test needs requires more than two rows otherwise will lead to false positives
|
139
|
+
signs = []
|
140
|
+
if @money_column_indices[0] > 0
|
141
|
+
column = columns[ @money_column_indices[0] - 1 ]
|
142
|
+
signs = column.uniq
|
143
|
+
end
|
144
|
+
if (signs.length != 2 &&
|
145
|
+
(@money_column_indices[0] + 1 < columns.length))
|
146
|
+
column = columns[ @money_column_indices[0] + 1 ]
|
147
|
+
signs = column.uniq
|
148
|
+
end
|
149
|
+
if signs.length == 2
|
150
|
+
negative_first = true
|
151
|
+
negative_first = false if signs[0] == "Bij" || signs[0].downcase =~ /^cr/ # look for known debit indicators
|
152
|
+
@money_column.each_with_index do |money, i|
|
153
|
+
if negative_first && column[i] == signs[0]
|
154
|
+
@money_column[i] = -money
|
155
|
+
elsif !negative_first && column[i] == signs[1]
|
156
|
+
@money_column[i] = -money
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
def detect_columns
|
163
|
+
results, found_likely_money_column = evaluate_columns(columns)
|
164
|
+
self.money_column_indices = [ results.sort { |a, b| b[:money_score] <=> a[:money_score] }.first[:index] ]
|
165
|
+
|
166
|
+
if !found_likely_money_column
|
167
|
+
found_likely_double_money_columns = false
|
168
|
+
0.upto(columns.length - 2) do |i|
|
169
|
+
if MoneyColumn.new( columns[i] ).merge!( MoneyColumn.new( columns[i+1] ) )
|
170
|
+
_, found_likely_double_money_columns = evaluate_columns(merge_columns(i, i+1))
|
171
|
+
if found_likely_double_money_columns
|
172
|
+
found_double_money_column( i, i + 1 )
|
173
|
+
break
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
if !found_likely_double_money_columns
|
179
|
+
0.upto(columns.length - 2) do |i|
|
180
|
+
if MoneyColumn.new( columns[i] ).merge!( MoneyColumn.new( columns[i+1] ) )
|
181
|
+
# Try a more specific test
|
182
|
+
_, found_likely_double_money_columns = evaluate_two_money_columns( columns, i, i+1, results )
|
183
|
+
if found_likely_double_money_columns
|
184
|
+
found_double_money_column( i, i + 1 )
|
185
|
+
break
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
if !found_likely_double_money_columns && !settings[:testing]
|
192
|
+
puts "I didn't find a high-likelyhood money column, but I'm taking my best guess with column #{money_column_indices.first + 1}."
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
results.reject! {|i| money_column_indices.include?(i[:index]) }
|
197
|
+
self.date_column_index = results.sort { |a, b| b[:date_score] <=> a[:date_score] }.first[:index]
|
198
|
+
results.reject! {|i| i[:index] == date_column_index }
|
199
|
+
@date_column = DateColumn.new( columns[ self.date_column_index ], @options )
|
200
|
+
|
201
|
+
if ( money_column_indices.length == 1 )
|
202
|
+
@money_column = MoneyColumn.new( columns[money_column_indices[0]],
|
203
|
+
@options )
|
204
|
+
detect_sign_column if @money_column.positive?
|
205
|
+
else
|
206
|
+
@money_column = MoneyColumn.new( columns[money_column_indices[0]],
|
207
|
+
@options )
|
208
|
+
@money_column.merge!(
|
209
|
+
MoneyColumn.new( columns[money_column_indices[1]], @options ) )
|
210
|
+
end
|
211
|
+
|
212
|
+
self.description_column_indices = results.map { |i| i[:index] }
|
213
|
+
end
|
214
|
+
|
215
|
+
def columns
|
216
|
+
@columns ||= begin
|
217
|
+
last_row_length = nil
|
218
|
+
csv_data.inject([]) do |memo, row|
|
219
|
+
# fail "Input CSV must have consistent row lengths." if last_row_length && row.length != last_row_length
|
220
|
+
unless row.all? { |i| i.nil? || i.length == 0 }
|
221
|
+
row.each_with_index do |entry, index|
|
222
|
+
memo[index] ||= []
|
223
|
+
memo[index] << (entry || '').strip
|
224
|
+
end
|
225
|
+
last_row_length = row.length
|
226
|
+
end
|
227
|
+
memo
|
228
|
+
end
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
def parse
|
233
|
+
data = options[:string] || File.read(options[:file])
|
234
|
+
|
235
|
+
if RUBY_VERSION =~ /^1\.9/ || RUBY_VERSION =~ /^2/
|
236
|
+
data = data.force_encoding(options[:encoding] || 'BINARY').encode('UTF-8', :invalid => :replace, :undef => :replace, :replace => '?')
|
237
|
+
csv_engine = CSV
|
238
|
+
else
|
239
|
+
csv_engine = FasterCSV
|
240
|
+
end
|
241
|
+
|
242
|
+
@csv_data = csv_engine.parse data.strip, :col_sep => options[:csv_separator] || ','
|
243
|
+
if options[:contains_header]
|
244
|
+
options[:contains_header].times { csv_data.shift }
|
245
|
+
end
|
246
|
+
csv_data
|
247
|
+
end
|
248
|
+
|
249
|
+
@settings = { :testing => false }
|
250
|
+
|
251
|
+
def self.settings
|
252
|
+
@settings
|
253
|
+
end
|
254
|
+
|
255
|
+
def settings
|
256
|
+
self.class.settings
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end
|