reckon 0.5.2 → 0.5.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/CHANGELOG.md +14 -6
- data/Gemfile.lock +1 -1
- data/README.md +6 -0
- data/lib/reckon.rb +1 -3
- data/lib/reckon/cosine_similarity.rb +91 -89
- data/lib/reckon/ledger_parser.rb +7 -1
- data/lib/reckon/logger.rb +4 -0
- data/lib/reckon/version.rb +1 -1
- data/spec/reckon/ledger_parser_spec.rb +24 -2
- metadata +42 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 77139229b37c2dcb66ec4f8494fb9d40f036ed267cee0dad067483568b02b948
|
4
|
+
data.tar.gz: 363a124cf17848e855dede2351f06946e799ead31b2440e586d5c01ae45e63f1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3473f4f80d659d8369151a4b22310159d8b4231df5a24efefd9fc426fa4d27744f2e62fb2c2d17826b0ce4c9a96ef13176ca9e602a47377b6271da49c1324cae
|
7
|
+
data.tar.gz: 323b5fe3aeafba7f04d93b91458d9982e75704c839bd763bf99371ba8f2b11f74d2b3a82a08bc57a56a1765f7a177955f441af03df63f844f0ae2804f842aacc
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
2.0.0-p648
|
data/CHANGELOG.md
CHANGED
@@ -1,12 +1,20 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## [v0.5.3](https://github.com/cantino/reckon/tree/v0.5.3) (2020-05-01)
|
4
|
+
|
5
|
+
[Full Changelog](https://github.com/cantino/reckon/compare/v0.5.2...v0.5.3)
|
6
|
+
|
7
|
+
**Closed issues:**
|
8
|
+
|
9
|
+
- Is reckon failing to handle comments when learning? [\#87](https://github.com/cantino/reckon/issues/87)
|
10
|
+
- \[FEATURE REQUEST\] Ask for currency of Account and output in output file in standard format of xxxx TLA for currency [\#84](https://github.com/cantino/reckon/issues/84)
|
11
|
+
|
3
12
|
## [v0.5.2](https://github.com/cantino/reckon/tree/v0.5.2) (2020-03-07)
|
4
13
|
|
5
14
|
[Full Changelog](https://github.com/cantino/reckon/compare/v0.5.1...v0.5.2)
|
6
15
|
|
7
16
|
**Closed issues:**
|
8
17
|
|
9
|
-
- \[BUG\] Reckon appears not to be parsing ISO standard date yyyy-mm-dd? [\#85](https://github.com/cantino/reckon/issues/85)
|
10
18
|
- \[Bug\]? Reckon fails to run on ruby 2.7.0 on Catalina [\#83](https://github.com/cantino/reckon/issues/83)
|
11
19
|
- --account-tokens issue [\#51](https://github.com/cantino/reckon/issues/51)
|
12
20
|
|
@@ -204,15 +212,15 @@
|
|
204
212
|
|
205
213
|
## [v0.3.3](https://github.com/cantino/reckon/tree/v0.3.3) (2013-01-13)
|
206
214
|
|
207
|
-
[Full Changelog](https://github.com/cantino/reckon/compare/v0.3.
|
215
|
+
[Full Changelog](https://github.com/cantino/reckon/compare/v0.3.2...v0.3.3)
|
208
216
|
|
209
|
-
## [v0.3.
|
217
|
+
## [v0.3.2](https://github.com/cantino/reckon/tree/v0.3.2) (2012-07-30)
|
210
218
|
|
211
|
-
[Full Changelog](https://github.com/cantino/reckon/compare/v0.3.
|
219
|
+
[Full Changelog](https://github.com/cantino/reckon/compare/v0.3.1...v0.3.2)
|
212
220
|
|
213
|
-
## [v0.3.
|
221
|
+
## [v0.3.1](https://github.com/cantino/reckon/tree/v0.3.1) (2012-07-30)
|
214
222
|
|
215
|
-
[Full Changelog](https://github.com/cantino/reckon/compare/5c07bea3fe63f9b909b4b76bd49f22fd8faf7a29...v0.3.
|
223
|
+
[Full Changelog](https://github.com/cantino/reckon/compare/5c07bea3fe63f9b909b4b76bd49f22fd8faf7a29...v0.3.1)
|
216
224
|
|
217
225
|
|
218
226
|
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -109,6 +109,12 @@ You can override them with `--default_outof_account` and `--default_into_account
|
|
109
109
|
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
110
110
|
* Send me a pull request. Bonus points for topic branches.
|
111
111
|
|
112
|
+
## Making a release
|
113
|
+
* Update lib/reckon/version.rb
|
114
|
+
* Run `github_changelog_generator --future-release v$(egrep '"[^"]+"' -o lib/reckon/version.rb |sed -e 's/"//g') --user cantino --project reckon -t $(cat ~/.github_token)`
|
115
|
+
* Commit
|
116
|
+
* Tag the commit same as in version.rb vX.XX.XX (ex v0.5.2)
|
117
|
+
|
112
118
|
## Copyright
|
113
119
|
|
114
120
|
Copyright (c) 2013 Andrew Cantino. See LICENSE for details.
|
data/lib/reckon.rb
CHANGED
@@ -10,10 +10,8 @@ require 'terminal-table'
|
|
10
10
|
require 'time'
|
11
11
|
require 'logger'
|
12
12
|
|
13
|
-
LOGGER = Logger.new(STDERR)
|
14
|
-
LOGGER.level = Logger::WARN
|
15
|
-
|
16
13
|
require_relative 'reckon/version'
|
14
|
+
require_relative 'reckon/logger'
|
17
15
|
require_relative 'reckon/cosine_similarity'
|
18
16
|
require_relative 'reckon/date_column'
|
19
17
|
require_relative 'reckon/money'
|
@@ -3,118 +3,120 @@ require 'set'
|
|
3
3
|
|
4
4
|
# Implementation of consine similarity using TF-IDF for vectorization.
|
5
5
|
# Used to suggest which account a transaction should be assigned to
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
def add_document(account, doc)
|
14
|
-
tokenize(doc).each do |n|
|
15
|
-
(token, count) = n
|
16
|
-
|
17
|
-
@tokens[token] ||= {}
|
18
|
-
@tokens[token][account] ||= 0
|
19
|
-
@tokens[token][account] += count
|
20
|
-
@accounts[account] += count
|
6
|
+
module Reckon
|
7
|
+
class CosineSimilarity
|
8
|
+
def initialize(options)
|
9
|
+
@options = options
|
10
|
+
@tokens = {}
|
11
|
+
@accounts = Hash.new(0)
|
21
12
|
end
|
22
|
-
end
|
23
|
-
|
24
|
-
# find most similar documents to query
|
25
|
-
def find_similar(query)
|
26
|
-
(query_scores, corpus_scores) = td_idf_scores_for(query)
|
27
13
|
|
28
|
-
|
14
|
+
def add_document(account, doc)
|
15
|
+
tokenize(doc).each do |n|
|
16
|
+
(token, count) = n
|
29
17
|
|
30
|
-
|
31
|
-
|
32
|
-
|
18
|
+
@tokens[token] ||= {}
|
19
|
+
@tokens[token][account] ||= 0
|
20
|
+
@tokens[token][account] += count
|
21
|
+
@accounts[account] += count
|
22
|
+
end
|
23
|
+
end
|
33
24
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
# see https://en.wikipedia.org/wiki/Cosine_similarity
|
38
|
-
# cos(theta) = (A . B) / (||A|| ||B||)
|
39
|
-
# where A . B is the "dot product" and ||A|| is the magnitude of A
|
40
|
-
# ruby has the 'matrix' library we can use to do these calculations.
|
41
|
-
{
|
42
|
-
similarity: acct_query_dp / (acct_vector.magnitude * query_vector.magnitude),
|
43
|
-
account: account,
|
44
|
-
}
|
45
|
-
end.select { |n| n[:similarity] > 0 }.sort_by { |n| -n[:similarity] }
|
25
|
+
# find most similar documents to query
|
26
|
+
def find_similar(query)
|
27
|
+
(query_scores, corpus_scores) = td_idf_scores_for(query)
|
46
28
|
|
47
|
-
|
29
|
+
query_vector = Vector.elements(query_scores, false)
|
48
30
|
|
49
|
-
|
50
|
-
|
31
|
+
# For each doc, calculate the similarity to the query
|
32
|
+
suggestions = corpus_scores.map do |account, scores|
|
33
|
+
acct_vector = Vector.elements(scores, false)
|
51
34
|
|
52
|
-
|
35
|
+
acct_query_dp = acct_vector.inner_product(query_vector)
|
36
|
+
# similarity is a float between 1 and -1, where 1 is exactly the same and -1 is
|
37
|
+
# exactly opposite
|
38
|
+
# see https://en.wikipedia.org/wiki/Cosine_similarity
|
39
|
+
# cos(theta) = (A . B) / (||A|| ||B||)
|
40
|
+
# where A . B is the "dot product" and ||A|| is the magnitude of A
|
41
|
+
# ruby has the 'matrix' library we can use to do these calculations.
|
42
|
+
{
|
43
|
+
similarity: acct_query_dp / (acct_vector.magnitude * query_vector.magnitude),
|
44
|
+
account: account,
|
45
|
+
}
|
46
|
+
end.select { |n| n[:similarity] > 0 }.sort_by { |n| -n[:similarity] }
|
53
47
|
|
54
|
-
|
55
|
-
query_tokens = tokenize(query)
|
56
|
-
corpus = Set.new
|
57
|
-
corpus_scores = {}
|
58
|
-
query_scores = []
|
59
|
-
num_docs = @accounts.length
|
48
|
+
LOGGER.info "most similar accounts: #{suggestions}"
|
60
49
|
|
61
|
-
|
62
|
-
(token, _count) = n
|
63
|
-
next unless @tokens[token]
|
64
|
-
corpus = corpus.union(Set.new(@tokens[token].keys))
|
50
|
+
return suggestions
|
65
51
|
end
|
66
52
|
|
67
|
-
|
68
|
-
(token, count) = n
|
53
|
+
private
|
69
54
|
|
70
|
-
|
71
|
-
|
55
|
+
def td_idf_scores_for(query)
|
56
|
+
query_tokens = tokenize(query)
|
57
|
+
corpus = Set.new
|
58
|
+
corpus_scores = {}
|
59
|
+
query_scores = []
|
60
|
+
num_docs = @accounts.length
|
61
|
+
|
62
|
+
query_tokens.each do |n|
|
63
|
+
(token, _count) = n
|
64
|
+
next unless @tokens[token]
|
65
|
+
corpus = corpus.union(Set.new(@tokens[token].keys))
|
66
|
+
end
|
72
67
|
|
73
|
-
|
74
|
-
|
75
|
-
count,
|
76
|
-
query_tokens.length,
|
77
|
-
@tokens[token].length,
|
78
|
-
num_docs
|
79
|
-
)
|
68
|
+
query_tokens.each do |n|
|
69
|
+
(token, count) = n
|
80
70
|
|
81
|
-
|
82
|
-
|
83
|
-
corpus_scores[account] ||= []
|
71
|
+
# if no other docs have token, ignore it
|
72
|
+
next unless @tokens[token]
|
84
73
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
74
|
+
## First, calculate scores for our query as we're building scores for the corpus
|
75
|
+
query_scores << calc_tf_idf(
|
76
|
+
count,
|
77
|
+
query_tokens.length,
|
78
|
+
@tokens[token].length,
|
89
79
|
num_docs
|
90
80
|
)
|
81
|
+
|
82
|
+
## Next, calculate for the corpus, where our "account" is a document
|
83
|
+
corpus.each do |account|
|
84
|
+
corpus_scores[account] ||= []
|
85
|
+
|
86
|
+
corpus_scores[account] << calc_tf_idf(
|
87
|
+
(@tokens[token][account] || 0),
|
88
|
+
@accounts[account].to_f,
|
89
|
+
@tokens[token].length.to_f,
|
90
|
+
num_docs
|
91
|
+
)
|
92
|
+
end
|
91
93
|
end
|
94
|
+
[query_scores, corpus_scores]
|
92
95
|
end
|
93
|
-
[query_scores, corpus_scores]
|
94
|
-
end
|
95
96
|
|
96
|
-
|
97
|
+
def calc_tf_idf(token_count, num_words_in_doc, df, num_docs)
|
97
98
|
|
98
|
-
|
99
|
-
|
99
|
+
# tf(t,d) = count of t in d / number of words in d
|
100
|
+
tf = token_count / num_words_in_doc.to_f
|
100
101
|
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
102
|
+
# smooth idf weight
|
103
|
+
# see https://en.wikipedia.org/wiki/Tf%E2%80%93idf#Inverse_document_frequency_2
|
104
|
+
# df(t) = num of documents with term t in them
|
105
|
+
# idf(t) = log(N/(1 + df )) + 1
|
106
|
+
idf = Math.log(num_docs.to_f / (1 + df)) + 1
|
106
107
|
|
107
|
-
|
108
|
-
|
108
|
+
tf * idf
|
109
|
+
end
|
109
110
|
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
end
|
111
|
+
def tokenize(str)
|
112
|
+
mk_tokens(str).inject(Hash.new(0)) do |memo, n|
|
113
|
+
memo[n] += 1
|
114
|
+
memo
|
115
|
+
end.to_a
|
116
|
+
end
|
117
117
|
|
118
|
-
def mk_tokens(str)
|
119
|
-
|
118
|
+
def mk_tokens(str)
|
119
|
+
str.downcase.tr(';', ' ').tr("'", '').split(/[^a-z0-9.]+/)
|
120
|
+
end
|
121
|
+
end
|
120
122
|
end
|
data/lib/reckon/ledger_parser.rb
CHANGED
@@ -121,8 +121,13 @@ module Reckon
|
|
121
121
|
def parse(ledger)
|
122
122
|
@entries = []
|
123
123
|
new_entry = {}
|
124
|
+
in_comment = false
|
124
125
|
ledger.strip.split("\n").each do |entry|
|
125
|
-
|
126
|
+
# strip comment lines
|
127
|
+
in_comment = true if entry == 'comment'
|
128
|
+
in_comment = false if entry == 'end comment'
|
129
|
+
next if in_comment
|
130
|
+
next if entry =~ /^\s*$/ || entry =~ /^[;#%|*]/
|
126
131
|
|
127
132
|
# (date, type, code, description), type and code are optional
|
128
133
|
if (m = entry.match(%r{^(\d+[\d/-]+)\s+([*!])?\s*(\([^)]+\))?\s*(.*)$}))
|
@@ -135,6 +140,7 @@ module Reckon
|
|
135
140
|
accounts: []
|
136
141
|
}
|
137
142
|
elsif new_entry[:date] && entry =~ /^\s+/
|
143
|
+
LOGGER.info("Adding new account #{entry}")
|
138
144
|
new_entry[:accounts] << parse_account_line(entry)
|
139
145
|
else
|
140
146
|
LOGGER.info("Unknown entry type: #{entry}")
|
data/lib/reckon/version.rb
CHANGED
@@ -28,6 +28,7 @@ describe Reckon::LedgerParser do
|
|
28
28
|
sized(15){string}.tr(%q{'`:*\\},'').gsub(/\s+/, ' ').gsub(/^[!;<\[( ]+/, '')
|
29
29
|
end
|
30
30
|
currency = choose(*currencies) # to be consistent within the transaction
|
31
|
+
single_line_comments = ";#|%*".split('').map { |n| "#{n} #{call(description)}" }
|
31
32
|
comments = ['', '; ', "\t;#{call(description)}", " ; #{call(description)}"]
|
32
33
|
date = Time.at(range(0, 1_581_389_644)).strftime(choose(*formats))
|
33
34
|
codes = [' ', " (#{string(:alnum).tr('()', '')}) "]
|
@@ -48,6 +49,7 @@ describe Reckon::LedgerParser do
|
|
48
49
|
ledger += "#{call(account_line)}\n"
|
49
50
|
end
|
50
51
|
ledger += "#{call(account)}\n"
|
52
|
+
ledger += choose(*single_line_comments) + "\n"
|
51
53
|
ledger
|
52
54
|
end
|
53
55
|
end.check(1000) do |s|
|
@@ -57,14 +59,34 @@ describe Reckon::LedgerParser do
|
|
57
59
|
ledger_csv = `echo #{safe_s} | ledger csv --date-format '%Y-%m-%d' -f - `
|
58
60
|
ledger_parser_csv = Reckon::LedgerParser.new(s, date_format: '%Y/%m/%d').to_csv.join("\n")
|
59
61
|
|
60
|
-
expected = CSV.parse(ledger_csv.gsub('\"', '""'), headers: headers).map
|
61
|
-
actual = CSV.parse(ledger_parser_csv, headers: headers).map
|
62
|
+
expected = CSV.parse(ledger_csv.gsub('\"', '""'), headers: headers).map(&filter_format)
|
63
|
+
actual = CSV.parse(ledger_parser_csv, headers: headers).map(&filter_format)
|
62
64
|
expected.length.times do |i|
|
63
65
|
expect(actual[i]).to eq(expected[i])
|
64
66
|
end
|
65
67
|
end
|
66
68
|
end
|
67
69
|
|
70
|
+
it 'should filter block comments' do
|
71
|
+
ledger = <<HERE
|
72
|
+
1970/11/01 Dinner should show up
|
73
|
+
Assets:Checking -123.00
|
74
|
+
Expenses:Restaurants
|
75
|
+
|
76
|
+
comment
|
77
|
+
|
78
|
+
1970/11/01 Lunch should NOT show up
|
79
|
+
Assets:Checking -12.00
|
80
|
+
Expenses:Restaurants
|
81
|
+
|
82
|
+
end comment
|
83
|
+
HERE
|
84
|
+
l = Reckon::LedgerParser.new(ledger)
|
85
|
+
expect(l.entries.length).to eq(1)
|
86
|
+
expect(l.entries.first[:desc]).to eq('Dinner should show up')
|
87
|
+
|
88
|
+
end
|
89
|
+
|
68
90
|
it "should ignore non-standard entries" do
|
69
91
|
@ledger.entries.length.should == 7
|
70
92
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: reckon
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Cantino
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2020-
|
13
|
+
date: 2020-05-02 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: rspec
|
@@ -137,6 +137,7 @@ files:
|
|
137
137
|
- lib/reckon/csv_parser.rb
|
138
138
|
- lib/reckon/date_column.rb
|
139
139
|
- lib/reckon/ledger_parser.rb
|
140
|
+
- lib/reckon/logger.rb
|
140
141
|
- lib/reckon/money.rb
|
141
142
|
- lib/reckon/version.rb
|
142
143
|
- reckon.gemspec
|
@@ -196,9 +197,46 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
196
197
|
- !ruby/object:Gem::Version
|
197
198
|
version: '0'
|
198
199
|
requirements: []
|
199
|
-
rubygems_version: 3.0.
|
200
|
+
rubygems_version: 3.0.3
|
200
201
|
signing_key:
|
201
202
|
specification_version: 4
|
202
203
|
summary: Utility for interactively converting and labeling CSV files for the Ledger
|
203
204
|
accounting tool.
|
204
|
-
test_files:
|
205
|
+
test_files:
|
206
|
+
- spec/data_fixtures/51-sample.csv
|
207
|
+
- spec/data_fixtures/51-tokens.yml
|
208
|
+
- spec/data_fixtures/73-sample.csv
|
209
|
+
- spec/data_fixtures/73-tokens.yml
|
210
|
+
- spec/data_fixtures/73-transactions.ledger
|
211
|
+
- spec/data_fixtures/85-date-example.csv
|
212
|
+
- spec/data_fixtures/austrian_example.csv
|
213
|
+
- spec/data_fixtures/bom_utf8_file.csv
|
214
|
+
- spec/data_fixtures/broker_canada_example.csv
|
215
|
+
- spec/data_fixtures/chase.csv
|
216
|
+
- spec/data_fixtures/danish_kroner_nordea_example.csv
|
217
|
+
- spec/data_fixtures/english_date_example.csv
|
218
|
+
- spec/data_fixtures/extratofake.csv
|
219
|
+
- spec/data_fixtures/french_example.csv
|
220
|
+
- spec/data_fixtures/german_date_example.csv
|
221
|
+
- spec/data_fixtures/harder_date_example.csv
|
222
|
+
- spec/data_fixtures/ing.csv
|
223
|
+
- spec/data_fixtures/intuit_mint_example.csv
|
224
|
+
- spec/data_fixtures/invalid_header_example.csv
|
225
|
+
- spec/data_fixtures/inversed_credit_card.csv
|
226
|
+
- spec/data_fixtures/nationwide.csv
|
227
|
+
- spec/data_fixtures/simple.csv
|
228
|
+
- spec/data_fixtures/some_other.csv
|
229
|
+
- spec/data_fixtures/spanish_date_example.csv
|
230
|
+
- spec/data_fixtures/suntrust.csv
|
231
|
+
- spec/data_fixtures/test_money_column.csv
|
232
|
+
- spec/data_fixtures/tokens.yaml
|
233
|
+
- spec/data_fixtures/two_money_columns.csv
|
234
|
+
- spec/data_fixtures/yyyymmdd_date_example.csv
|
235
|
+
- spec/reckon/app_spec.rb
|
236
|
+
- spec/reckon/csv_parser_spec.rb
|
237
|
+
- spec/reckon/date_column_spec.rb
|
238
|
+
- spec/reckon/ledger_parser_spec.rb
|
239
|
+
- spec/reckon/money_column_spec.rb
|
240
|
+
- spec/reckon/money_spec.rb
|
241
|
+
- spec/spec.opts
|
242
|
+
- spec/spec_helper.rb
|