reckon 0.9.0 → 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +2 -0
- data/.rubocop.yml +20 -0
- data/CHANGELOG.md +10 -0
- data/Gemfile.lock +1 -1
- data/Rakefile +2 -2
- data/bin/build-new-version.sh +3 -2
- data/bin/reckon +1 -1
- data/lib/reckon/app.rb +27 -24
- data/lib/reckon/beancount_parser.rb +150 -0
- data/lib/reckon/cosine_similarity.rb +0 -1
- data/lib/reckon/csv_parser.rb +73 -37
- data/lib/reckon/date_column.rb +18 -7
- data/lib/reckon/ledger_parser.rb +23 -15
- data/lib/reckon/money.rb +18 -16
- data/lib/reckon/options.rb +44 -19
- data/lib/reckon/version.rb +1 -1
- data/lib/reckon.rb +1 -0
- data/spec/cosine_training_and_test.rb +1 -1
- data/spec/data_fixtures/multi-line-field.csv +5 -0
- data/spec/integration/invalid_header_example/output.ledger +6 -7
- data/spec/integration/invalid_header_example/test_args +1 -1
- data/spec/integration/tab_delimited_file/input.csv +2 -0
- data/spec/integration/tab_delimited_file/output.ledger +8 -0
- data/spec/integration/tab_delimited_file/test_args +1 -0
- data/spec/reckon/csv_parser_spec.rb +85 -26
- data/spec/reckon/date_column_spec.rb +6 -0
- data/spec/reckon/ledger_parser_spec.rb +25 -23
- data/spec/reckon/options_spec.rb +2 -2
- data/spec/spec_helper.rb +2 -0
- metadata +8 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 03c20b48d4333969c8304a5bb9a3c01fc6053050ab9146329ce14ae6a9886b38
|
4
|
+
data.tar.gz: 27a2ce4e8db5c7818cc4cefb19f180a7c727190f0a990403f565fad503e749a9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2f569b3d5cf4038714065a6d184d6c07f57d10598e5efc610eeb9919e8b18c65aff5e5329ab89a9ed30f72cabce9d11f5645af4d0df3bda6d05ad9afd988f7e7
|
7
|
+
data.tar.gz: 1783a63ba138c2b87a0756d6b9bcfbce068daf977e582a4c920a37ff50358328f8514f308dbbf932ef5cc4111e9e52dadfaed5876b9d30f4759d4a1eb31299fa
|
data/.github/workflows/ruby.yml
CHANGED
data/.rubocop.yml
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Layout/LineLength:
|
2
|
+
Max: 88
|
3
|
+
|
4
|
+
Style/StringLiterals:
|
5
|
+
Enabled: false
|
6
|
+
|
7
|
+
Style/RedundantReturn:
|
8
|
+
Enabled: false
|
9
|
+
|
10
|
+
Metrics/ClassLength:
|
11
|
+
Enabled: False
|
12
|
+
|
13
|
+
Metrics/MethodLength:
|
14
|
+
Enabled: False
|
15
|
+
|
16
|
+
Metrics/AbcSize:
|
17
|
+
Enabled: False
|
18
|
+
|
19
|
+
Style/NumericPredicate:
|
20
|
+
Enabled: False
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,15 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## [v0.9.1](https://github.com/cantino/reckon/tree/v0.9.1) (2023-03-19)
|
4
|
+
|
5
|
+
[Full Changelog](https://github.com/cantino/reckon/compare/v0.9.0...v0.9.1)
|
6
|
+
|
7
|
+
**Closed issues:**
|
8
|
+
|
9
|
+
- More than one column support [\#120](https://github.com/cantino/reckon/issues/120)
|
10
|
+
- Beancount support [\#119](https://github.com/cantino/reckon/issues/119)
|
11
|
+
- Problem with importing CSV [\#60](https://github.com/cantino/reckon/issues/60)
|
12
|
+
|
3
13
|
## [v0.9.0](https://github.com/cantino/reckon/tree/v0.9.0) (2023-02-23)
|
4
14
|
|
5
15
|
[Full Changelog](https://github.com/cantino/reckon/compare/v0.9.0-beta...v0.9.0)
|
data/Gemfile.lock
CHANGED
data/Rakefile
CHANGED
@@ -13,10 +13,10 @@ task :test_all do
|
|
13
13
|
puts "Running unit tests"
|
14
14
|
Rake::Task["spec"].invoke
|
15
15
|
puts "Running integration tests"
|
16
|
-
Rake::Task["
|
16
|
+
Rake::Task["test_integration"].invoke
|
17
17
|
end
|
18
18
|
|
19
|
-
task :
|
19
|
+
task :test_integration do
|
20
20
|
cmd = 'prove -v ./spec/integration/test.sh'
|
21
21
|
raise 'Integration tests failed' unless system(cmd)
|
22
22
|
end
|
data/bin/build-new-version.sh
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
#!/bin/bash
|
2
2
|
|
3
|
-
set -
|
3
|
+
set -xe
|
4
4
|
|
5
5
|
VERSION=$1
|
6
6
|
|
@@ -8,7 +8,7 @@ echo "Install github_changelog_generator"
|
|
8
8
|
gem install --user github_changelog_generator
|
9
9
|
|
10
10
|
echo "Update 'lib/reckon/version.rb'"
|
11
|
-
echo -e "module Reckon\n VERSION
|
11
|
+
echo -e "module Reckon\n VERSION = \"$VERSION\"\nend" > lib/reckon/version.rb
|
12
12
|
echo "Run `bundle install` to build updated Gemfile.lock"
|
13
13
|
bundle install
|
14
14
|
echo "Run changelog generator (requires $TOKEN to be your github token)"
|
@@ -24,3 +24,4 @@ echo "Push changes and tags"
|
|
24
24
|
echo "git push && git push --tags"
|
25
25
|
echo "Push new gem"
|
26
26
|
echo "gem push reckon-$VERSION.gem"
|
27
|
+
gh release create v$VERSION reckon-$VERSION.gem --draft --generate-notes
|
data/bin/reckon
CHANGED
data/lib/reckon/app.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'pp'
|
4
3
|
require 'yaml'
|
4
|
+
require 'stringio'
|
5
5
|
|
6
6
|
module Reckon
|
7
|
+
# The main app
|
7
8
|
class App
|
8
9
|
attr_accessor :options, :seen, :csv_parser, :regexps, :matcher
|
9
|
-
@@cli = HighLine.new
|
10
10
|
|
11
11
|
def initialize(opts = {})
|
12
12
|
self.options = opts
|
@@ -14,9 +14,10 @@ module Reckon
|
|
14
14
|
|
15
15
|
self.regexps = {}
|
16
16
|
self.seen = Set.new
|
17
|
-
|
18
|
-
@csv_parser = CSVParser.new(
|
17
|
+
@cli = HighLine.new
|
18
|
+
@csv_parser = CSVParser.new(options)
|
19
19
|
@matcher = CosineSimilarity.new(options)
|
20
|
+
@parser = options[:format] =~ /beancount/i ? BeancountParser.new : LedgerParser.new
|
20
21
|
learn!
|
21
22
|
end
|
22
23
|
|
@@ -26,9 +27,13 @@ module Reckon
|
|
26
27
|
fh.puts str
|
27
28
|
end
|
28
29
|
|
30
|
+
# Learn from previous transactions. Used to recommend accounts for a transaction.
|
29
31
|
def learn!
|
30
32
|
learn_from_account_tokens(options[:account_tokens_file])
|
31
33
|
learn_from_ledger_file(options[:existing_ledger_file])
|
34
|
+
# TODO: make this work
|
35
|
+
# this doesn't work because output_file is an IO object
|
36
|
+
# learn_from_ledger_file(options[:output_file]) if File.exist?(options[:output_file])
|
32
37
|
end
|
33
38
|
|
34
39
|
def learn_from_account_tokens(filename)
|
@@ -52,12 +57,13 @@ module Reckon
|
|
52
57
|
|
53
58
|
raise "#{ledger_file} doesn't exist!" unless File.exist?(ledger_file)
|
54
59
|
|
55
|
-
learn_from_ledger(File.
|
60
|
+
learn_from_ledger(File.new(ledger_file))
|
56
61
|
end
|
57
62
|
|
63
|
+
# Takes an IO-like object
|
58
64
|
def learn_from_ledger(ledger)
|
59
65
|
LOGGER.info "learning from #{ledger}"
|
60
|
-
|
66
|
+
@parser.parse(ledger).each do |entry|
|
61
67
|
entry[:accounts].each do |account|
|
62
68
|
str = [entry[:desc], account[:amount]].join(" ")
|
63
69
|
if account[:name] != options[:bank_account]
|
@@ -84,7 +90,7 @@ module Reckon
|
|
84
90
|
merged_acct = [account, k].compact.join(':')
|
85
91
|
extract_account_tokens(v, merged_acct)
|
86
92
|
end
|
87
|
-
at.inject({}) { |memo, e| memo.merge!(e)}
|
93
|
+
at.inject({}) { |memo, e| memo.merge!(e) }
|
88
94
|
end
|
89
95
|
end
|
90
96
|
|
@@ -92,6 +98,7 @@ module Reckon
|
|
92
98
|
# https://github.com/tenderlove/psych/blob/master/lib/psych/visitors/to_ruby.rb
|
93
99
|
match = regex_str.match(/^\/(.*)\/([ix]*)$/m)
|
94
100
|
fail "failed to parse regexp #{regex_str}" unless match
|
101
|
+
|
95
102
|
options = 0
|
96
103
|
(match[2] || '').split('').each do |option|
|
97
104
|
case option
|
@@ -120,13 +127,16 @@ module Reckon
|
|
120
127
|
|
121
128
|
if row[:money] > 0
|
122
129
|
# out_of_account
|
123
|
-
answer = ask_account_question(
|
130
|
+
answer = ask_account_question(
|
131
|
+
"Which account provided this income? (#{cmd_options})", row
|
132
|
+
)
|
124
133
|
line1 = [options[:bank_account], row[:pretty_money]]
|
125
134
|
line2 = [answer, ""]
|
126
135
|
else
|
127
136
|
# into_account
|
128
|
-
answer = ask_account_question(
|
129
|
-
|
137
|
+
answer = ask_account_question(
|
138
|
+
"To which account did this money go? (#{cmd_options})", row
|
139
|
+
)
|
130
140
|
line1 = [answer, ""]
|
131
141
|
line2 = [options[:bank_account], row[:pretty_money]]
|
132
142
|
end
|
@@ -137,9 +147,9 @@ module Reckon
|
|
137
147
|
next
|
138
148
|
end
|
139
149
|
|
140
|
-
ledger =
|
150
|
+
ledger = @parser.format_row(row, line1, line2)
|
141
151
|
LOGGER.info "ledger line: #{ledger}"
|
142
|
-
learn_from_ledger(ledger) unless options[:account_tokens_file]
|
152
|
+
learn_from_ledger(StringIO.new(ledger)) unless options[:account_tokens_file]
|
143
153
|
output(ledger)
|
144
154
|
end
|
145
155
|
end
|
@@ -203,7 +213,7 @@ module Reckon
|
|
203
213
|
return possible_answers[0] || default
|
204
214
|
end
|
205
215
|
|
206
|
-
answer =
|
216
|
+
answer = @cli.ask(msg) do |q|
|
207
217
|
q.completion = possible_answers
|
208
218
|
q.readline = true
|
209
219
|
q.default = possible_answers.first
|
@@ -221,7 +231,7 @@ module Reckon
|
|
221
231
|
end
|
222
232
|
|
223
233
|
def add_description(row)
|
224
|
-
desc_answer =
|
234
|
+
desc_answer = @cli.ask("Enter a new description for this transaction (empty line aborts)\n") do |q|
|
225
235
|
q.overwrite = true
|
226
236
|
q.readline = true
|
227
237
|
q.default = row[:description]
|
@@ -231,7 +241,7 @@ module Reckon
|
|
231
241
|
end
|
232
242
|
|
233
243
|
def add_note(row)
|
234
|
-
desc_answer =
|
244
|
+
desc_answer = @cli.ask("Enter a new note for this transaction (empty line aborts)\n") do |q|
|
235
245
|
q.overwrite = true
|
236
246
|
q.readline = true
|
237
247
|
q.default = row[:note]
|
@@ -246,7 +256,7 @@ module Reckon
|
|
246
256
|
[account, match[0]]
|
247
257
|
end
|
248
258
|
}.compact
|
249
|
-
matches.sort_by
|
259
|
+
matches.sort_by { |_account, matched_text| matched_text.length }.map(&:first)
|
250
260
|
end
|
251
261
|
|
252
262
|
def suggest(row)
|
@@ -254,13 +264,6 @@ module Reckon
|
|
254
264
|
@matcher.find_similar(row[:description]).map { |n| n[:account] }
|
255
265
|
end
|
256
266
|
|
257
|
-
def ledger_format(row, line1, line2)
|
258
|
-
out = "#{row[:pretty_date]}\t#{row[:description]}#{row[:note] ? "\t; " + row[:note]: ""}\n"
|
259
|
-
out += "\t#{line1.first}\t\t\t#{line1.last}\n"
|
260
|
-
out += "\t#{line2.first}\t\t\t#{line2.last}\n\n"
|
261
|
-
out
|
262
|
-
end
|
263
|
-
|
264
267
|
def output(ledger_line)
|
265
268
|
options[:output_file].puts ledger_line
|
266
269
|
options[:output_file].flush
|
@@ -0,0 +1,150 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'date'
|
3
|
+
|
4
|
+
module Reckon
|
5
|
+
class BeancountParser
|
6
|
+
|
7
|
+
attr_accessor :entries
|
8
|
+
|
9
|
+
def initialize(options = {})
|
10
|
+
@options = options
|
11
|
+
@date_format = options[:ledger_date_format] || options[:date_format] || '%Y-%m-%d'
|
12
|
+
end
|
13
|
+
|
14
|
+
# 2015-01-01 * "Opening Balance for checking account"
|
15
|
+
# Assets:US:BofA:Checking 3490.52 USD
|
16
|
+
# Equity:Opening-Balances -3490.52 USD
|
17
|
+
|
18
|
+
# input is an object that response to #each_line,
|
19
|
+
# (i.e. a StringIO or an IO object)
|
20
|
+
def parse(input)
|
21
|
+
entries = []
|
22
|
+
comment_chars = ';#%*|'
|
23
|
+
new_entry = {}
|
24
|
+
|
25
|
+
input.each_line do |entry|
|
26
|
+
|
27
|
+
next if entry =~ /^\s*[#{comment_chars}]/
|
28
|
+
|
29
|
+
m = entry.match(%r{
|
30
|
+
^
|
31
|
+
(\d+[\d/-]+) # date
|
32
|
+
\s+
|
33
|
+
([*!])? # type
|
34
|
+
\s*
|
35
|
+
("[^"]*")? # description (optional)
|
36
|
+
\s*
|
37
|
+
("[^"]*")? # notes (optional)
|
38
|
+
# tags (not implemented)
|
39
|
+
}x)
|
40
|
+
|
41
|
+
# (date, type, code, description), type and code are optional
|
42
|
+
if (m)
|
43
|
+
add_entry(entries, new_entry)
|
44
|
+
new_entry = {
|
45
|
+
date: try_parse_date(m[1]),
|
46
|
+
type: m[2] || "",
|
47
|
+
desc: trim_quote(m[3]),
|
48
|
+
notes: trim_quote(m[4]),
|
49
|
+
accounts: []
|
50
|
+
}
|
51
|
+
elsif entry =~ /^\s*$/ && new_entry[:date]
|
52
|
+
add_entry(entries, new_entry)
|
53
|
+
new_entry = {}
|
54
|
+
elsif new_entry[:date] && entry =~ /^\s+/
|
55
|
+
LOGGER.info("Adding new account #{entry}")
|
56
|
+
new_entry[:accounts] << parse_account_line(entry)
|
57
|
+
else
|
58
|
+
LOGGER.info("Unknown entry type: #{entry}")
|
59
|
+
add_entry(entries, new_entry)
|
60
|
+
new_entry = {}
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
entries
|
65
|
+
end
|
66
|
+
|
67
|
+
def format_row(row, line1, line2)
|
68
|
+
out = %Q{#{row[:pretty_date]} * "#{row[:description]}" "#{row[:note]}\n}
|
69
|
+
out += "\t#{line1.first}\t\t\t#{line1.last}\n"
|
70
|
+
out += "\t#{line2.first}\t\t\t#{line2.last}\n\n"
|
71
|
+
out
|
72
|
+
end
|
73
|
+
|
74
|
+
private
|
75
|
+
|
76
|
+
# remove leading and trailing quote character (")
|
77
|
+
def trim_quote(str)
|
78
|
+
return str if !str
|
79
|
+
str.gsub(/^"([^"]*)"$/, '\1')
|
80
|
+
end
|
81
|
+
|
82
|
+
def add_entry(entries, entry)
|
83
|
+
return unless entry[:date] && entry[:accounts].length > 1
|
84
|
+
|
85
|
+
entry[:accounts] = balance(entry[:accounts])
|
86
|
+
entries << entry
|
87
|
+
end
|
88
|
+
|
89
|
+
def try_parse_date(date_str)
|
90
|
+
date = Date.parse(date_str)
|
91
|
+
return nil if date.year > 9999 || date.year < 1000
|
92
|
+
|
93
|
+
date
|
94
|
+
rescue ArgumentError
|
95
|
+
nil
|
96
|
+
end
|
97
|
+
|
98
|
+
def parse_account_line(entry)
|
99
|
+
# TODO handle buying stocks
|
100
|
+
# Assets:US:ETrade:VHT 19 VHT {132.32 USD, 2017-08-27}
|
101
|
+
(account_name, rest) = entry.strip.split(/\s{2,}|\t+/, 2)
|
102
|
+
|
103
|
+
if rest.nil? || rest.empty?
|
104
|
+
return {
|
105
|
+
name: account_name,
|
106
|
+
amount: clean_money("")
|
107
|
+
}
|
108
|
+
end
|
109
|
+
|
110
|
+
value = if rest =~ /{/
|
111
|
+
(qty, dollar_value, date) = rest.split(/[{,]/)
|
112
|
+
(qty.to_f * dollar_value.to_f).to_s
|
113
|
+
else
|
114
|
+
rest
|
115
|
+
end
|
116
|
+
|
117
|
+
return {
|
118
|
+
name: account_name,
|
119
|
+
amount: clean_money(value || "")
|
120
|
+
}
|
121
|
+
end
|
122
|
+
|
123
|
+
def balance(accounts)
|
124
|
+
return accounts unless accounts.any? { |i| i[:amount].nil? }
|
125
|
+
|
126
|
+
sum = accounts.reduce(0) { |m, n| m + (n[:amount] || 0) }
|
127
|
+
count = 0
|
128
|
+
accounts.each do |account|
|
129
|
+
next unless account[:amount].nil?
|
130
|
+
|
131
|
+
count += 1
|
132
|
+
account[:amount] = -sum
|
133
|
+
end
|
134
|
+
if count > 1
|
135
|
+
puts "Warning: unparsable entry due to more than one missing money value."
|
136
|
+
p accounts
|
137
|
+
puts
|
138
|
+
end
|
139
|
+
|
140
|
+
accounts
|
141
|
+
end
|
142
|
+
|
143
|
+
def clean_money(money)
|
144
|
+
return nil if money.nil? || money.empty?
|
145
|
+
|
146
|
+
money.gsub(/[^0-9.-]/, '').to_f
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
data/lib/reckon/csv_parser.rb
CHANGED
@@ -1,32 +1,28 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'stringio'
|
2
4
|
|
3
5
|
module Reckon
|
6
|
+
# Parses CSV files
|
4
7
|
class CSVParser
|
5
|
-
attr_accessor :options, :csv_data, :money_column_indices, :date_column_index,
|
8
|
+
attr_accessor :options, :csv_data, :money_column_indices, :date_column_index,
|
9
|
+
:description_column_indices, :money_column, :date_column
|
6
10
|
|
7
11
|
def initialize(options = {})
|
8
12
|
self.options = options
|
13
|
+
|
14
|
+
self.options[:csv_separator] = "\t" if options[:csv_separator] == '\t'
|
9
15
|
self.options[:currency] ||= '$'
|
16
|
+
|
17
|
+
# we convert to a string so we can do character encoding cleanup
|
10
18
|
@csv_data = parse(options[:string] || File.read(options[:file]), options[:file])
|
11
19
|
filter_csv
|
12
20
|
detect_columns
|
13
21
|
end
|
14
22
|
|
23
|
+
# transpose csv_data (array of rows) to an array of columns
|
15
24
|
def columns
|
16
|
-
@columns ||=
|
17
|
-
begin
|
18
|
-
last_row_length = nil
|
19
|
-
csv_data.inject([]) do |memo, row|
|
20
|
-
unless row.all? { |i| i.nil? || i.length == 0 }
|
21
|
-
row.each_with_index do |entry, index|
|
22
|
-
memo[index] ||= []
|
23
|
-
memo[index] << (entry || '').strip
|
24
|
-
end
|
25
|
-
last_row_length = row.length
|
26
|
-
end
|
27
|
-
memo
|
28
|
-
end
|
29
|
-
end
|
25
|
+
@columns ||= @csv_data[0].zip(*@csv_data[1..])
|
30
26
|
end
|
31
27
|
|
32
28
|
def date_for(index)
|
@@ -34,7 +30,7 @@ module Reckon
|
|
34
30
|
end
|
35
31
|
|
36
32
|
def pretty_date_for(index)
|
37
|
-
@date_column.pretty_for(
|
33
|
+
@date_column.pretty_for(index)
|
38
34
|
end
|
39
35
|
|
40
36
|
def money_for(index)
|
@@ -42,7 +38,7 @@ module Reckon
|
|
42
38
|
end
|
43
39
|
|
44
40
|
def pretty_money(amount, negate = false)
|
45
|
-
Money.new(
|
41
|
+
Money.new(amount, @options).pretty(negate)
|
46
42
|
end
|
47
43
|
|
48
44
|
def pretty_money_for(index, negate = false)
|
@@ -54,11 +50,11 @@ module Reckon
|
|
54
50
|
|
55
51
|
def description_for(index)
|
56
52
|
description_column_indices.map { |i| columns[i][index].to_s.strip }
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
53
|
+
.reject(&:empty?)
|
54
|
+
.join("; ")
|
55
|
+
.squeeze(" ")
|
56
|
+
.gsub(/(;\s+){2,}/, '')
|
57
|
+
.strip
|
62
58
|
end
|
63
59
|
|
64
60
|
def row(index)
|
@@ -84,9 +80,10 @@ module Reckon
|
|
84
80
|
money_score = date_score = possible_neg_money_count = possible_pos_money_count = 0
|
85
81
|
last = nil
|
86
82
|
column.reverse.each_with_index do |entry, row_from_bottom|
|
83
|
+
entry ||= "" # entries can be nil
|
87
84
|
row = csv_data[csv_data.length - 1 - row_from_bottom]
|
88
85
|
entry = entry.strip
|
89
|
-
money_score += Money::likelihood(
|
86
|
+
money_score += Money::likelihood(entry)
|
90
87
|
possible_neg_money_count += 1 if entry =~ /^\$?[\-\(]\$?\d+/
|
91
88
|
possible_pos_money_count += 1 if entry =~ /^\+?\$?\+?\d+/
|
92
89
|
date_score += DateColumn.likelihood(entry)
|
@@ -97,8 +94,8 @@ module Reckon
|
|
97
94
|
row.each do |row_entry|
|
98
95
|
row_entry = row_entry.to_s.gsub(/[^\-\d\.]/, '').to_f
|
99
96
|
if row_entry != 0 && last + row_entry == entry_as_num
|
100
|
-
|
101
|
-
|
97
|
+
money_score -= 10
|
98
|
+
break
|
102
99
|
end
|
103
100
|
end
|
104
101
|
end
|
@@ -110,7 +107,8 @@ module Reckon
|
|
110
107
|
found_likely_money_column = true
|
111
108
|
end
|
112
109
|
|
113
|
-
results << { :index => index, :money_score => money_score,
|
110
|
+
results << { :index => index, :money_score => money_score,
|
111
|
+
:date_score => date_score }
|
114
112
|
end
|
115
113
|
|
116
114
|
results.sort_by! { |n| -n[:money_score] }
|
@@ -129,14 +127,15 @@ module Reckon
|
|
129
127
|
# Some csv files negative/positive amounts are indicated in separate account
|
130
128
|
def detect_sign_column
|
131
129
|
return if columns[0].length <= 2 # This test needs requires more than two rows otherwise will lead to false positives
|
130
|
+
|
132
131
|
signs = []
|
133
132
|
if @money_column_indices[0] > 0
|
134
|
-
column = columns[
|
133
|
+
column = columns[@money_column_indices[0] - 1]
|
135
134
|
signs = column.uniq
|
136
135
|
end
|
137
136
|
if (signs.length != 2 &&
|
138
137
|
(@money_column_indices[0] + 1 < columns.length))
|
139
|
-
column = columns[
|
138
|
+
column = columns[@money_column_indices[0] + 1]
|
140
139
|
signs = column.uniq
|
141
140
|
end
|
142
141
|
if signs.length == 2
|
@@ -166,15 +165,19 @@ module Reckon
|
|
166
165
|
self.money_column_indices = [options[:money_column] - 1]
|
167
166
|
elsif options[:money_columns].length == 2
|
168
167
|
in_col, out_col = options[:money_columns]
|
169
|
-
self.money_column_indices = [in_col -1, out_col -1]
|
168
|
+
self.money_column_indices = [in_col - 1, out_col - 1]
|
170
169
|
else
|
171
170
|
puts "Unable to determine money columns, use --money-columns to specify the 1 or 2 column(s) reckon should use."
|
172
171
|
end
|
173
172
|
|
174
173
|
# If no money_column(s) argument is supplied, try to automatically infer money_column(s)
|
175
174
|
else
|
176
|
-
self.money_column_indices = results.select { |n|
|
175
|
+
self.money_column_indices = results.select { |n|
|
176
|
+
n[:is_money_column]
|
177
|
+
}.map { |n| n[:index] }
|
177
178
|
if self.money_column_indices.length == 1
|
179
|
+
# TODO: print the unfiltered column number, not the filtered
|
180
|
+
# ie if money column is 7, but we ignore columns 4 and 5, this prints "Using column 5 as the money column"
|
178
181
|
puts "Using column #{money_column_indices.first + 1} as the money column. Use --money-colum to specify a different one."
|
179
182
|
elsif self.money_column_indices.length == 2
|
180
183
|
puts "Using columns #{money_column_indices[0] + 1} and #{money_column_indices[1] + 1} as money column. Use --money-columns to specify different ones."
|
@@ -204,20 +207,53 @@ module Reckon
|
|
204
207
|
self.description_column_indices = results.map { |i| i[:index] }
|
205
208
|
end
|
206
209
|
|
207
|
-
def parse(data, filename=nil)
|
210
|
+
def parse(data, filename = nil)
|
208
211
|
# Use force_encoding to convert the string to utf-8 with as few invalid characters
|
209
212
|
# as possible.
|
210
213
|
data.force_encoding(try_encoding(data, filename))
|
211
214
|
data = data.encode('UTF-8', invalid: :replace, undef: :replace, replace: '?')
|
212
215
|
data.sub!("\xEF\xBB\xBF", '') # strip byte order marker, if it exists
|
213
216
|
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
217
|
+
separator = options[:csv_separator] || guess_column_separator(data)
|
218
|
+
header_lines_to_skip = options[:contains_header] || 0
|
219
|
+
# -1 is skip 0 footer rows
|
220
|
+
footer_lines_to_skip = (options[:contains_footer] || 0) + 1
|
221
|
+
|
222
|
+
# convert to a stringio object to handle multi-line fields
|
223
|
+
parser_opts = {
|
224
|
+
col_sep: separator,
|
225
|
+
skip_blanks: true
|
226
|
+
}
|
227
|
+
begin
|
228
|
+
rows = CSV.parse(StringIO.new(data), **parser_opts)
|
229
|
+
rows[header_lines_to_skip..-footer_lines_to_skip]
|
230
|
+
rescue CSV::MalformedCSVError
|
231
|
+
# try removing N header lines before parsing
|
232
|
+
index = 0
|
233
|
+
count = 0
|
234
|
+
while count < header_lines_to_skip
|
235
|
+
index = data.index("\n", index) + 1 # skip over newline character
|
236
|
+
count += 1
|
237
|
+
end
|
238
|
+
rows = CSV.parse(StringIO.new(data[index..-1]), **parser_opts)
|
239
|
+
rows[0..-footer_lines_to_skip]
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
def guess_column_separator(data)
|
244
|
+
delimiters = [',', "\t", ';', ':', '|']
|
245
|
+
|
246
|
+
counts = [0] * delimiters.length
|
247
|
+
|
248
|
+
data.each_line do |line|
|
249
|
+
delimiters.each_with_index do |delim, i|
|
250
|
+
counts[i] += line.count(delim)
|
251
|
+
end
|
218
252
|
end
|
219
253
|
|
220
|
-
|
254
|
+
LOGGER.info("guessing #{delimiters[counts.index(counts.max)]} as csv separator")
|
255
|
+
|
256
|
+
delimiters[counts.index(counts.max)]
|
221
257
|
end
|
222
258
|
|
223
259
|
def try_encoding(data, filename = nil)
|