reckon 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +2 -0
- data/.rubocop.yml +20 -0
- data/CHANGELOG.md +10 -0
- data/Gemfile.lock +1 -1
- data/Rakefile +2 -2
- data/bin/build-new-version.sh +3 -2
- data/bin/reckon +1 -1
- data/lib/reckon/app.rb +27 -24
- data/lib/reckon/beancount_parser.rb +150 -0
- data/lib/reckon/cosine_similarity.rb +0 -1
- data/lib/reckon/csv_parser.rb +73 -37
- data/lib/reckon/date_column.rb +18 -7
- data/lib/reckon/ledger_parser.rb +23 -15
- data/lib/reckon/money.rb +18 -16
- data/lib/reckon/options.rb +44 -19
- data/lib/reckon/version.rb +1 -1
- data/lib/reckon.rb +1 -0
- data/spec/cosine_training_and_test.rb +1 -1
- data/spec/data_fixtures/multi-line-field.csv +5 -0
- data/spec/integration/invalid_header_example/output.ledger +6 -7
- data/spec/integration/invalid_header_example/test_args +1 -1
- data/spec/integration/tab_delimited_file/input.csv +2 -0
- data/spec/integration/tab_delimited_file/output.ledger +8 -0
- data/spec/integration/tab_delimited_file/test_args +1 -0
- data/spec/reckon/csv_parser_spec.rb +85 -26
- data/spec/reckon/date_column_spec.rb +6 -0
- data/spec/reckon/ledger_parser_spec.rb +25 -23
- data/spec/reckon/options_spec.rb +2 -2
- data/spec/spec_helper.rb +2 -0
- metadata +8 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 03c20b48d4333969c8304a5bb9a3c01fc6053050ab9146329ce14ae6a9886b38
|
4
|
+
data.tar.gz: 27a2ce4e8db5c7818cc4cefb19f180a7c727190f0a990403f565fad503e749a9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2f569b3d5cf4038714065a6d184d6c07f57d10598e5efc610eeb9919e8b18c65aff5e5329ab89a9ed30f72cabce9d11f5645af4d0df3bda6d05ad9afd988f7e7
|
7
|
+
data.tar.gz: 1783a63ba138c2b87a0756d6b9bcfbce068daf977e582a4c920a37ff50358328f8514f308dbbf932ef5cc4111e9e52dadfaed5876b9d30f4759d4a1eb31299fa
|
data/.github/workflows/ruby.yml
CHANGED
data/.rubocop.yml
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Layout/LineLength:
|
2
|
+
Max: 88
|
3
|
+
|
4
|
+
Style/StringLiterals:
|
5
|
+
Enabled: false
|
6
|
+
|
7
|
+
Style/RedundantReturn:
|
8
|
+
Enabled: false
|
9
|
+
|
10
|
+
Metrics/ClassLength:
|
11
|
+
Enabled: False
|
12
|
+
|
13
|
+
Metrics/MethodLength:
|
14
|
+
Enabled: False
|
15
|
+
|
16
|
+
Metrics/AbcSize:
|
17
|
+
Enabled: False
|
18
|
+
|
19
|
+
Style/NumericPredicate:
|
20
|
+
Enabled: False
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,15 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## [v0.9.1](https://github.com/cantino/reckon/tree/v0.9.1) (2023-03-19)
|
4
|
+
|
5
|
+
[Full Changelog](https://github.com/cantino/reckon/compare/v0.9.0...v0.9.1)
|
6
|
+
|
7
|
+
**Closed issues:**
|
8
|
+
|
9
|
+
- More than one column support [\#120](https://github.com/cantino/reckon/issues/120)
|
10
|
+
- Beancount support [\#119](https://github.com/cantino/reckon/issues/119)
|
11
|
+
- Problem with importing CSV [\#60](https://github.com/cantino/reckon/issues/60)
|
12
|
+
|
3
13
|
## [v0.9.0](https://github.com/cantino/reckon/tree/v0.9.0) (2023-02-23)
|
4
14
|
|
5
15
|
[Full Changelog](https://github.com/cantino/reckon/compare/v0.9.0-beta...v0.9.0)
|
data/Gemfile.lock
CHANGED
data/Rakefile
CHANGED
@@ -13,10 +13,10 @@ task :test_all do
|
|
13
13
|
puts "Running unit tests"
|
14
14
|
Rake::Task["spec"].invoke
|
15
15
|
puts "Running integration tests"
|
16
|
-
Rake::Task["
|
16
|
+
Rake::Task["test_integration"].invoke
|
17
17
|
end
|
18
18
|
|
19
|
-
task :
|
19
|
+
task :test_integration do
|
20
20
|
cmd = 'prove -v ./spec/integration/test.sh'
|
21
21
|
raise 'Integration tests failed' unless system(cmd)
|
22
22
|
end
|
data/bin/build-new-version.sh
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
#!/bin/bash
|
2
2
|
|
3
|
-
set -
|
3
|
+
set -xe
|
4
4
|
|
5
5
|
VERSION=$1
|
6
6
|
|
@@ -8,7 +8,7 @@ echo "Install github_changelog_generator"
|
|
8
8
|
gem install --user github_changelog_generator
|
9
9
|
|
10
10
|
echo "Update 'lib/reckon/version.rb'"
|
11
|
-
echo -e "module Reckon\n VERSION
|
11
|
+
echo -e "module Reckon\n VERSION = \"$VERSION\"\nend" > lib/reckon/version.rb
|
12
12
|
echo "Run `bundle install` to build updated Gemfile.lock"
|
13
13
|
bundle install
|
14
14
|
echo "Run changelog generator (requires $TOKEN to be your github token)"
|
@@ -24,3 +24,4 @@ echo "Push changes and tags"
|
|
24
24
|
echo "git push && git push --tags"
|
25
25
|
echo "Push new gem"
|
26
26
|
echo "gem push reckon-$VERSION.gem"
|
27
|
+
gh release create v$VERSION reckon-$VERSION.gem --draft --generate-notes
|
data/bin/reckon
CHANGED
data/lib/reckon/app.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'pp'
|
4
3
|
require 'yaml'
|
4
|
+
require 'stringio'
|
5
5
|
|
6
6
|
module Reckon
|
7
|
+
# The main app
|
7
8
|
class App
|
8
9
|
attr_accessor :options, :seen, :csv_parser, :regexps, :matcher
|
9
|
-
@@cli = HighLine.new
|
10
10
|
|
11
11
|
def initialize(opts = {})
|
12
12
|
self.options = opts
|
@@ -14,9 +14,10 @@ module Reckon
|
|
14
14
|
|
15
15
|
self.regexps = {}
|
16
16
|
self.seen = Set.new
|
17
|
-
|
18
|
-
@csv_parser = CSVParser.new(
|
17
|
+
@cli = HighLine.new
|
18
|
+
@csv_parser = CSVParser.new(options)
|
19
19
|
@matcher = CosineSimilarity.new(options)
|
20
|
+
@parser = options[:format] =~ /beancount/i ? BeancountParser.new : LedgerParser.new
|
20
21
|
learn!
|
21
22
|
end
|
22
23
|
|
@@ -26,9 +27,13 @@ module Reckon
|
|
26
27
|
fh.puts str
|
27
28
|
end
|
28
29
|
|
30
|
+
# Learn from previous transactions. Used to recommend accounts for a transaction.
|
29
31
|
def learn!
|
30
32
|
learn_from_account_tokens(options[:account_tokens_file])
|
31
33
|
learn_from_ledger_file(options[:existing_ledger_file])
|
34
|
+
# TODO: make this work
|
35
|
+
# this doesn't work because output_file is an IO object
|
36
|
+
# learn_from_ledger_file(options[:output_file]) if File.exist?(options[:output_file])
|
32
37
|
end
|
33
38
|
|
34
39
|
def learn_from_account_tokens(filename)
|
@@ -52,12 +57,13 @@ module Reckon
|
|
52
57
|
|
53
58
|
raise "#{ledger_file} doesn't exist!" unless File.exist?(ledger_file)
|
54
59
|
|
55
|
-
learn_from_ledger(File.
|
60
|
+
learn_from_ledger(File.new(ledger_file))
|
56
61
|
end
|
57
62
|
|
63
|
+
# Takes an IO-like object
|
58
64
|
def learn_from_ledger(ledger)
|
59
65
|
LOGGER.info "learning from #{ledger}"
|
60
|
-
|
66
|
+
@parser.parse(ledger).each do |entry|
|
61
67
|
entry[:accounts].each do |account|
|
62
68
|
str = [entry[:desc], account[:amount]].join(" ")
|
63
69
|
if account[:name] != options[:bank_account]
|
@@ -84,7 +90,7 @@ module Reckon
|
|
84
90
|
merged_acct = [account, k].compact.join(':')
|
85
91
|
extract_account_tokens(v, merged_acct)
|
86
92
|
end
|
87
|
-
at.inject({}) { |memo, e| memo.merge!(e)}
|
93
|
+
at.inject({}) { |memo, e| memo.merge!(e) }
|
88
94
|
end
|
89
95
|
end
|
90
96
|
|
@@ -92,6 +98,7 @@ module Reckon
|
|
92
98
|
# https://github.com/tenderlove/psych/blob/master/lib/psych/visitors/to_ruby.rb
|
93
99
|
match = regex_str.match(/^\/(.*)\/([ix]*)$/m)
|
94
100
|
fail "failed to parse regexp #{regex_str}" unless match
|
101
|
+
|
95
102
|
options = 0
|
96
103
|
(match[2] || '').split('').each do |option|
|
97
104
|
case option
|
@@ -120,13 +127,16 @@ module Reckon
|
|
120
127
|
|
121
128
|
if row[:money] > 0
|
122
129
|
# out_of_account
|
123
|
-
answer = ask_account_question(
|
130
|
+
answer = ask_account_question(
|
131
|
+
"Which account provided this income? (#{cmd_options})", row
|
132
|
+
)
|
124
133
|
line1 = [options[:bank_account], row[:pretty_money]]
|
125
134
|
line2 = [answer, ""]
|
126
135
|
else
|
127
136
|
# into_account
|
128
|
-
answer = ask_account_question(
|
129
|
-
|
137
|
+
answer = ask_account_question(
|
138
|
+
"To which account did this money go? (#{cmd_options})", row
|
139
|
+
)
|
130
140
|
line1 = [answer, ""]
|
131
141
|
line2 = [options[:bank_account], row[:pretty_money]]
|
132
142
|
end
|
@@ -137,9 +147,9 @@ module Reckon
|
|
137
147
|
next
|
138
148
|
end
|
139
149
|
|
140
|
-
ledger =
|
150
|
+
ledger = @parser.format_row(row, line1, line2)
|
141
151
|
LOGGER.info "ledger line: #{ledger}"
|
142
|
-
learn_from_ledger(ledger) unless options[:account_tokens_file]
|
152
|
+
learn_from_ledger(StringIO.new(ledger)) unless options[:account_tokens_file]
|
143
153
|
output(ledger)
|
144
154
|
end
|
145
155
|
end
|
@@ -203,7 +213,7 @@ module Reckon
|
|
203
213
|
return possible_answers[0] || default
|
204
214
|
end
|
205
215
|
|
206
|
-
answer =
|
216
|
+
answer = @cli.ask(msg) do |q|
|
207
217
|
q.completion = possible_answers
|
208
218
|
q.readline = true
|
209
219
|
q.default = possible_answers.first
|
@@ -221,7 +231,7 @@ module Reckon
|
|
221
231
|
end
|
222
232
|
|
223
233
|
def add_description(row)
|
224
|
-
desc_answer =
|
234
|
+
desc_answer = @cli.ask("Enter a new description for this transaction (empty line aborts)\n") do |q|
|
225
235
|
q.overwrite = true
|
226
236
|
q.readline = true
|
227
237
|
q.default = row[:description]
|
@@ -231,7 +241,7 @@ module Reckon
|
|
231
241
|
end
|
232
242
|
|
233
243
|
def add_note(row)
|
234
|
-
desc_answer =
|
244
|
+
desc_answer = @cli.ask("Enter a new note for this transaction (empty line aborts)\n") do |q|
|
235
245
|
q.overwrite = true
|
236
246
|
q.readline = true
|
237
247
|
q.default = row[:note]
|
@@ -246,7 +256,7 @@ module Reckon
|
|
246
256
|
[account, match[0]]
|
247
257
|
end
|
248
258
|
}.compact
|
249
|
-
matches.sort_by
|
259
|
+
matches.sort_by { |_account, matched_text| matched_text.length }.map(&:first)
|
250
260
|
end
|
251
261
|
|
252
262
|
def suggest(row)
|
@@ -254,13 +264,6 @@ module Reckon
|
|
254
264
|
@matcher.find_similar(row[:description]).map { |n| n[:account] }
|
255
265
|
end
|
256
266
|
|
257
|
-
def ledger_format(row, line1, line2)
|
258
|
-
out = "#{row[:pretty_date]}\t#{row[:description]}#{row[:note] ? "\t; " + row[:note]: ""}\n"
|
259
|
-
out += "\t#{line1.first}\t\t\t#{line1.last}\n"
|
260
|
-
out += "\t#{line2.first}\t\t\t#{line2.last}\n\n"
|
261
|
-
out
|
262
|
-
end
|
263
|
-
|
264
267
|
def output(ledger_line)
|
265
268
|
options[:output_file].puts ledger_line
|
266
269
|
options[:output_file].flush
|
@@ -0,0 +1,150 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'date'
|
3
|
+
|
4
|
+
module Reckon
|
5
|
+
class BeancountParser
|
6
|
+
|
7
|
+
attr_accessor :entries
|
8
|
+
|
9
|
+
def initialize(options = {})
|
10
|
+
@options = options
|
11
|
+
@date_format = options[:ledger_date_format] || options[:date_format] || '%Y-%m-%d'
|
12
|
+
end
|
13
|
+
|
14
|
+
# 2015-01-01 * "Opening Balance for checking account"
|
15
|
+
# Assets:US:BofA:Checking 3490.52 USD
|
16
|
+
# Equity:Opening-Balances -3490.52 USD
|
17
|
+
|
18
|
+
# input is an object that response to #each_line,
|
19
|
+
# (i.e. a StringIO or an IO object)
|
20
|
+
def parse(input)
|
21
|
+
entries = []
|
22
|
+
comment_chars = ';#%*|'
|
23
|
+
new_entry = {}
|
24
|
+
|
25
|
+
input.each_line do |entry|
|
26
|
+
|
27
|
+
next if entry =~ /^\s*[#{comment_chars}]/
|
28
|
+
|
29
|
+
m = entry.match(%r{
|
30
|
+
^
|
31
|
+
(\d+[\d/-]+) # date
|
32
|
+
\s+
|
33
|
+
([*!])? # type
|
34
|
+
\s*
|
35
|
+
("[^"]*")? # description (optional)
|
36
|
+
\s*
|
37
|
+
("[^"]*")? # notes (optional)
|
38
|
+
# tags (not implemented)
|
39
|
+
}x)
|
40
|
+
|
41
|
+
# (date, type, code, description), type and code are optional
|
42
|
+
if (m)
|
43
|
+
add_entry(entries, new_entry)
|
44
|
+
new_entry = {
|
45
|
+
date: try_parse_date(m[1]),
|
46
|
+
type: m[2] || "",
|
47
|
+
desc: trim_quote(m[3]),
|
48
|
+
notes: trim_quote(m[4]),
|
49
|
+
accounts: []
|
50
|
+
}
|
51
|
+
elsif entry =~ /^\s*$/ && new_entry[:date]
|
52
|
+
add_entry(entries, new_entry)
|
53
|
+
new_entry = {}
|
54
|
+
elsif new_entry[:date] && entry =~ /^\s+/
|
55
|
+
LOGGER.info("Adding new account #{entry}")
|
56
|
+
new_entry[:accounts] << parse_account_line(entry)
|
57
|
+
else
|
58
|
+
LOGGER.info("Unknown entry type: #{entry}")
|
59
|
+
add_entry(entries, new_entry)
|
60
|
+
new_entry = {}
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
entries
|
65
|
+
end
|
66
|
+
|
67
|
+
def format_row(row, line1, line2)
|
68
|
+
out = %Q{#{row[:pretty_date]} * "#{row[:description]}" "#{row[:note]}\n}
|
69
|
+
out += "\t#{line1.first}\t\t\t#{line1.last}\n"
|
70
|
+
out += "\t#{line2.first}\t\t\t#{line2.last}\n\n"
|
71
|
+
out
|
72
|
+
end
|
73
|
+
|
74
|
+
private
|
75
|
+
|
76
|
+
# remove leading and trailing quote character (")
|
77
|
+
def trim_quote(str)
|
78
|
+
return str if !str
|
79
|
+
str.gsub(/^"([^"]*)"$/, '\1')
|
80
|
+
end
|
81
|
+
|
82
|
+
def add_entry(entries, entry)
|
83
|
+
return unless entry[:date] && entry[:accounts].length > 1
|
84
|
+
|
85
|
+
entry[:accounts] = balance(entry[:accounts])
|
86
|
+
entries << entry
|
87
|
+
end
|
88
|
+
|
89
|
+
def try_parse_date(date_str)
|
90
|
+
date = Date.parse(date_str)
|
91
|
+
return nil if date.year > 9999 || date.year < 1000
|
92
|
+
|
93
|
+
date
|
94
|
+
rescue ArgumentError
|
95
|
+
nil
|
96
|
+
end
|
97
|
+
|
98
|
+
def parse_account_line(entry)
|
99
|
+
# TODO handle buying stocks
|
100
|
+
# Assets:US:ETrade:VHT 19 VHT {132.32 USD, 2017-08-27}
|
101
|
+
(account_name, rest) = entry.strip.split(/\s{2,}|\t+/, 2)
|
102
|
+
|
103
|
+
if rest.nil? || rest.empty?
|
104
|
+
return {
|
105
|
+
name: account_name,
|
106
|
+
amount: clean_money("")
|
107
|
+
}
|
108
|
+
end
|
109
|
+
|
110
|
+
value = if rest =~ /{/
|
111
|
+
(qty, dollar_value, date) = rest.split(/[{,]/)
|
112
|
+
(qty.to_f * dollar_value.to_f).to_s
|
113
|
+
else
|
114
|
+
rest
|
115
|
+
end
|
116
|
+
|
117
|
+
return {
|
118
|
+
name: account_name,
|
119
|
+
amount: clean_money(value || "")
|
120
|
+
}
|
121
|
+
end
|
122
|
+
|
123
|
+
def balance(accounts)
|
124
|
+
return accounts unless accounts.any? { |i| i[:amount].nil? }
|
125
|
+
|
126
|
+
sum = accounts.reduce(0) { |m, n| m + (n[:amount] || 0) }
|
127
|
+
count = 0
|
128
|
+
accounts.each do |account|
|
129
|
+
next unless account[:amount].nil?
|
130
|
+
|
131
|
+
count += 1
|
132
|
+
account[:amount] = -sum
|
133
|
+
end
|
134
|
+
if count > 1
|
135
|
+
puts "Warning: unparsable entry due to more than one missing money value."
|
136
|
+
p accounts
|
137
|
+
puts
|
138
|
+
end
|
139
|
+
|
140
|
+
accounts
|
141
|
+
end
|
142
|
+
|
143
|
+
def clean_money(money)
|
144
|
+
return nil if money.nil? || money.empty?
|
145
|
+
|
146
|
+
money.gsub(/[^0-9.-]/, '').to_f
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
data/lib/reckon/csv_parser.rb
CHANGED
@@ -1,32 +1,28 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'stringio'
|
2
4
|
|
3
5
|
module Reckon
|
6
|
+
# Parses CSV files
|
4
7
|
class CSVParser
|
5
|
-
attr_accessor :options, :csv_data, :money_column_indices, :date_column_index,
|
8
|
+
attr_accessor :options, :csv_data, :money_column_indices, :date_column_index,
|
9
|
+
:description_column_indices, :money_column, :date_column
|
6
10
|
|
7
11
|
def initialize(options = {})
|
8
12
|
self.options = options
|
13
|
+
|
14
|
+
self.options[:csv_separator] = "\t" if options[:csv_separator] == '\t'
|
9
15
|
self.options[:currency] ||= '$'
|
16
|
+
|
17
|
+
# we convert to a string so we can do character encoding cleanup
|
10
18
|
@csv_data = parse(options[:string] || File.read(options[:file]), options[:file])
|
11
19
|
filter_csv
|
12
20
|
detect_columns
|
13
21
|
end
|
14
22
|
|
23
|
+
# transpose csv_data (array of rows) to an array of columns
|
15
24
|
def columns
|
16
|
-
@columns ||=
|
17
|
-
begin
|
18
|
-
last_row_length = nil
|
19
|
-
csv_data.inject([]) do |memo, row|
|
20
|
-
unless row.all? { |i| i.nil? || i.length == 0 }
|
21
|
-
row.each_with_index do |entry, index|
|
22
|
-
memo[index] ||= []
|
23
|
-
memo[index] << (entry || '').strip
|
24
|
-
end
|
25
|
-
last_row_length = row.length
|
26
|
-
end
|
27
|
-
memo
|
28
|
-
end
|
29
|
-
end
|
25
|
+
@columns ||= @csv_data[0].zip(*@csv_data[1..])
|
30
26
|
end
|
31
27
|
|
32
28
|
def date_for(index)
|
@@ -34,7 +30,7 @@ module Reckon
|
|
34
30
|
end
|
35
31
|
|
36
32
|
def pretty_date_for(index)
|
37
|
-
@date_column.pretty_for(
|
33
|
+
@date_column.pretty_for(index)
|
38
34
|
end
|
39
35
|
|
40
36
|
def money_for(index)
|
@@ -42,7 +38,7 @@ module Reckon
|
|
42
38
|
end
|
43
39
|
|
44
40
|
def pretty_money(amount, negate = false)
|
45
|
-
Money.new(
|
41
|
+
Money.new(amount, @options).pretty(negate)
|
46
42
|
end
|
47
43
|
|
48
44
|
def pretty_money_for(index, negate = false)
|
@@ -54,11 +50,11 @@ module Reckon
|
|
54
50
|
|
55
51
|
def description_for(index)
|
56
52
|
description_column_indices.map { |i| columns[i][index].to_s.strip }
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
53
|
+
.reject(&:empty?)
|
54
|
+
.join("; ")
|
55
|
+
.squeeze(" ")
|
56
|
+
.gsub(/(;\s+){2,}/, '')
|
57
|
+
.strip
|
62
58
|
end
|
63
59
|
|
64
60
|
def row(index)
|
@@ -84,9 +80,10 @@ module Reckon
|
|
84
80
|
money_score = date_score = possible_neg_money_count = possible_pos_money_count = 0
|
85
81
|
last = nil
|
86
82
|
column.reverse.each_with_index do |entry, row_from_bottom|
|
83
|
+
entry ||= "" # entries can be nil
|
87
84
|
row = csv_data[csv_data.length - 1 - row_from_bottom]
|
88
85
|
entry = entry.strip
|
89
|
-
money_score += Money::likelihood(
|
86
|
+
money_score += Money::likelihood(entry)
|
90
87
|
possible_neg_money_count += 1 if entry =~ /^\$?[\-\(]\$?\d+/
|
91
88
|
possible_pos_money_count += 1 if entry =~ /^\+?\$?\+?\d+/
|
92
89
|
date_score += DateColumn.likelihood(entry)
|
@@ -97,8 +94,8 @@ module Reckon
|
|
97
94
|
row.each do |row_entry|
|
98
95
|
row_entry = row_entry.to_s.gsub(/[^\-\d\.]/, '').to_f
|
99
96
|
if row_entry != 0 && last + row_entry == entry_as_num
|
100
|
-
|
101
|
-
|
97
|
+
money_score -= 10
|
98
|
+
break
|
102
99
|
end
|
103
100
|
end
|
104
101
|
end
|
@@ -110,7 +107,8 @@ module Reckon
|
|
110
107
|
found_likely_money_column = true
|
111
108
|
end
|
112
109
|
|
113
|
-
results << { :index => index, :money_score => money_score,
|
110
|
+
results << { :index => index, :money_score => money_score,
|
111
|
+
:date_score => date_score }
|
114
112
|
end
|
115
113
|
|
116
114
|
results.sort_by! { |n| -n[:money_score] }
|
@@ -129,14 +127,15 @@ module Reckon
|
|
129
127
|
# Some csv files negative/positive amounts are indicated in separate account
|
130
128
|
def detect_sign_column
|
131
129
|
return if columns[0].length <= 2 # This test needs requires more than two rows otherwise will lead to false positives
|
130
|
+
|
132
131
|
signs = []
|
133
132
|
if @money_column_indices[0] > 0
|
134
|
-
column = columns[
|
133
|
+
column = columns[@money_column_indices[0] - 1]
|
135
134
|
signs = column.uniq
|
136
135
|
end
|
137
136
|
if (signs.length != 2 &&
|
138
137
|
(@money_column_indices[0] + 1 < columns.length))
|
139
|
-
column = columns[
|
138
|
+
column = columns[@money_column_indices[0] + 1]
|
140
139
|
signs = column.uniq
|
141
140
|
end
|
142
141
|
if signs.length == 2
|
@@ -166,15 +165,19 @@ module Reckon
|
|
166
165
|
self.money_column_indices = [options[:money_column] - 1]
|
167
166
|
elsif options[:money_columns].length == 2
|
168
167
|
in_col, out_col = options[:money_columns]
|
169
|
-
self.money_column_indices = [in_col -1, out_col -1]
|
168
|
+
self.money_column_indices = [in_col - 1, out_col - 1]
|
170
169
|
else
|
171
170
|
puts "Unable to determine money columns, use --money-columns to specify the 1 or 2 column(s) reckon should use."
|
172
171
|
end
|
173
172
|
|
174
173
|
# If no money_column(s) argument is supplied, try to automatically infer money_column(s)
|
175
174
|
else
|
176
|
-
self.money_column_indices = results.select { |n|
|
175
|
+
self.money_column_indices = results.select { |n|
|
176
|
+
n[:is_money_column]
|
177
|
+
}.map { |n| n[:index] }
|
177
178
|
if self.money_column_indices.length == 1
|
179
|
+
# TODO: print the unfiltered column number, not the filtered
|
180
|
+
# ie if money column is 7, but we ignore columns 4 and 5, this prints "Using column 5 as the money column"
|
178
181
|
puts "Using column #{money_column_indices.first + 1} as the money column. Use --money-colum to specify a different one."
|
179
182
|
elsif self.money_column_indices.length == 2
|
180
183
|
puts "Using columns #{money_column_indices[0] + 1} and #{money_column_indices[1] + 1} as money column. Use --money-columns to specify different ones."
|
@@ -204,20 +207,53 @@ module Reckon
|
|
204
207
|
self.description_column_indices = results.map { |i| i[:index] }
|
205
208
|
end
|
206
209
|
|
207
|
-
def parse(data, filename=nil)
|
210
|
+
def parse(data, filename = nil)
|
208
211
|
# Use force_encoding to convert the string to utf-8 with as few invalid characters
|
209
212
|
# as possible.
|
210
213
|
data.force_encoding(try_encoding(data, filename))
|
211
214
|
data = data.encode('UTF-8', invalid: :replace, undef: :replace, replace: '?')
|
212
215
|
data.sub!("\xEF\xBB\xBF", '') # strip byte order marker, if it exists
|
213
216
|
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
217
|
+
separator = options[:csv_separator] || guess_column_separator(data)
|
218
|
+
header_lines_to_skip = options[:contains_header] || 0
|
219
|
+
# -1 is skip 0 footer rows
|
220
|
+
footer_lines_to_skip = (options[:contains_footer] || 0) + 1
|
221
|
+
|
222
|
+
# convert to a stringio object to handle multi-line fields
|
223
|
+
parser_opts = {
|
224
|
+
col_sep: separator,
|
225
|
+
skip_blanks: true
|
226
|
+
}
|
227
|
+
begin
|
228
|
+
rows = CSV.parse(StringIO.new(data), **parser_opts)
|
229
|
+
rows[header_lines_to_skip..-footer_lines_to_skip]
|
230
|
+
rescue CSV::MalformedCSVError
|
231
|
+
# try removing N header lines before parsing
|
232
|
+
index = 0
|
233
|
+
count = 0
|
234
|
+
while count < header_lines_to_skip
|
235
|
+
index = data.index("\n", index) + 1 # skip over newline character
|
236
|
+
count += 1
|
237
|
+
end
|
238
|
+
rows = CSV.parse(StringIO.new(data[index..-1]), **parser_opts)
|
239
|
+
rows[0..-footer_lines_to_skip]
|
240
|
+
end
|
241
|
+
end
|
242
|
+
|
243
|
+
def guess_column_separator(data)
|
244
|
+
delimiters = [',', "\t", ';', ':', '|']
|
245
|
+
|
246
|
+
counts = [0] * delimiters.length
|
247
|
+
|
248
|
+
data.each_line do |line|
|
249
|
+
delimiters.each_with_index do |delim, i|
|
250
|
+
counts[i] += line.count(delim)
|
251
|
+
end
|
218
252
|
end
|
219
253
|
|
220
|
-
|
254
|
+
LOGGER.info("guessing #{delimiters[counts.index(counts.max)]} as csv separator")
|
255
|
+
|
256
|
+
delimiters[counts.index(counts.max)]
|
221
257
|
end
|
222
258
|
|
223
259
|
def try_encoding(data, filename = nil)
|