reckon 0.5.2 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/CHANGELOG.md +14 -6
- data/Gemfile.lock +1 -1
- data/README.md +6 -0
- data/lib/reckon.rb +1 -3
- data/lib/reckon/cosine_similarity.rb +91 -89
- data/lib/reckon/ledger_parser.rb +7 -1
- data/lib/reckon/logger.rb +4 -0
- data/lib/reckon/version.rb +1 -1
- data/spec/reckon/ledger_parser_spec.rb +24 -2
- metadata +42 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 77139229b37c2dcb66ec4f8494fb9d40f036ed267cee0dad067483568b02b948
|
4
|
+
data.tar.gz: 363a124cf17848e855dede2351f06946e799ead31b2440e586d5c01ae45e63f1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3473f4f80d659d8369151a4b22310159d8b4231df5a24efefd9fc426fa4d27744f2e62fb2c2d17826b0ce4c9a96ef13176ca9e602a47377b6271da49c1324cae
|
7
|
+
data.tar.gz: 323b5fe3aeafba7f04d93b91458d9982e75704c839bd763bf99371ba8f2b11f74d2b3a82a08bc57a56a1765f7a177955f441af03df63f844f0ae2804f842aacc
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
2.0.0-p648
|
data/CHANGELOG.md
CHANGED
@@ -1,12 +1,20 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
+
## [v0.5.3](https://github.com/cantino/reckon/tree/v0.5.3) (2020-05-01)
|
4
|
+
|
5
|
+
[Full Changelog](https://github.com/cantino/reckon/compare/v0.5.2...v0.5.3)
|
6
|
+
|
7
|
+
**Closed issues:**
|
8
|
+
|
9
|
+
- Is reckon failing to handle comments when learning? [\#87](https://github.com/cantino/reckon/issues/87)
|
10
|
+
- \[FEATURE REQUEST\] Ask for currency of Account and output in output file in standard format of xxxx TLA for currency [\#84](https://github.com/cantino/reckon/issues/84)
|
11
|
+
|
3
12
|
## [v0.5.2](https://github.com/cantino/reckon/tree/v0.5.2) (2020-03-07)
|
4
13
|
|
5
14
|
[Full Changelog](https://github.com/cantino/reckon/compare/v0.5.1...v0.5.2)
|
6
15
|
|
7
16
|
**Closed issues:**
|
8
17
|
|
9
|
-
- \[BUG\] Reckon appears not to be parsing ISO standard date yyyy-mm-dd? [\#85](https://github.com/cantino/reckon/issues/85)
|
10
18
|
- \[Bug\]? Reckon fails to run on ruby 2.7.0 on Catalina [\#83](https://github.com/cantino/reckon/issues/83)
|
11
19
|
- --account-tokens issue [\#51](https://github.com/cantino/reckon/issues/51)
|
12
20
|
|
@@ -204,15 +212,15 @@
|
|
204
212
|
|
205
213
|
## [v0.3.3](https://github.com/cantino/reckon/tree/v0.3.3) (2013-01-13)
|
206
214
|
|
207
|
-
[Full Changelog](https://github.com/cantino/reckon/compare/v0.3.
|
215
|
+
[Full Changelog](https://github.com/cantino/reckon/compare/v0.3.2...v0.3.3)
|
208
216
|
|
209
|
-
## [v0.3.
|
217
|
+
## [v0.3.2](https://github.com/cantino/reckon/tree/v0.3.2) (2012-07-30)
|
210
218
|
|
211
|
-
[Full Changelog](https://github.com/cantino/reckon/compare/v0.3.
|
219
|
+
[Full Changelog](https://github.com/cantino/reckon/compare/v0.3.1...v0.3.2)
|
212
220
|
|
213
|
-
## [v0.3.
|
221
|
+
## [v0.3.1](https://github.com/cantino/reckon/tree/v0.3.1) (2012-07-30)
|
214
222
|
|
215
|
-
[Full Changelog](https://github.com/cantino/reckon/compare/5c07bea3fe63f9b909b4b76bd49f22fd8faf7a29...v0.3.
|
223
|
+
[Full Changelog](https://github.com/cantino/reckon/compare/5c07bea3fe63f9b909b4b76bd49f22fd8faf7a29...v0.3.1)
|
216
224
|
|
217
225
|
|
218
226
|
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -109,6 +109,12 @@ You can override them with `--default_outof_account` and `--default_into_account
|
|
109
109
|
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
110
110
|
* Send me a pull request. Bonus points for topic branches.
|
111
111
|
|
112
|
+
## Making a release
|
113
|
+
* Update lib/reckon/version.rb
|
114
|
+
* Run `github_changelog_generator --future-release v$(egrep '"[^"]+"' -o lib/reckon/version.rb |sed -e 's/"//g') --user cantino --project reckon -t $(cat ~/.github_token)`
|
115
|
+
* Commit
|
116
|
+
* Tag the commit same as in version.rb vX.XX.XX (ex v0.5.2)
|
117
|
+
|
112
118
|
## Copyright
|
113
119
|
|
114
120
|
Copyright (c) 2013 Andrew Cantino. See LICENSE for details.
|
data/lib/reckon.rb
CHANGED
@@ -10,10 +10,8 @@ require 'terminal-table'
|
|
10
10
|
require 'time'
|
11
11
|
require 'logger'
|
12
12
|
|
13
|
-
LOGGER = Logger.new(STDERR)
|
14
|
-
LOGGER.level = Logger::WARN
|
15
|
-
|
16
13
|
require_relative 'reckon/version'
|
14
|
+
require_relative 'reckon/logger'
|
17
15
|
require_relative 'reckon/cosine_similarity'
|
18
16
|
require_relative 'reckon/date_column'
|
19
17
|
require_relative 'reckon/money'
|
@@ -3,118 +3,120 @@ require 'set'
|
|
3
3
|
|
4
4
|
# Implementation of consine similarity using TF-IDF for vectorization.
|
5
5
|
# Used to suggest which account a transaction should be assigned to
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
def add_document(account, doc)
|
14
|
-
tokenize(doc).each do |n|
|
15
|
-
(token, count) = n
|
16
|
-
|
17
|
-
@tokens[token] ||= {}
|
18
|
-
@tokens[token][account] ||= 0
|
19
|
-
@tokens[token][account] += count
|
20
|
-
@accounts[account] += count
|
6
|
+
module Reckon
|
7
|
+
class CosineSimilarity
|
8
|
+
def initialize(options)
|
9
|
+
@options = options
|
10
|
+
@tokens = {}
|
11
|
+
@accounts = Hash.new(0)
|
21
12
|
end
|
22
|
-
end
|
23
|
-
|
24
|
-
# find most similar documents to query
|
25
|
-
def find_similar(query)
|
26
|
-
(query_scores, corpus_scores) = td_idf_scores_for(query)
|
27
13
|
|
28
|
-
|
14
|
+
def add_document(account, doc)
|
15
|
+
tokenize(doc).each do |n|
|
16
|
+
(token, count) = n
|
29
17
|
|
30
|
-
|
31
|
-
|
32
|
-
|
18
|
+
@tokens[token] ||= {}
|
19
|
+
@tokens[token][account] ||= 0
|
20
|
+
@tokens[token][account] += count
|
21
|
+
@accounts[account] += count
|
22
|
+
end
|
23
|
+
end
|
33
24
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
# see https://en.wikipedia.org/wiki/Cosine_similarity
|
38
|
-
# cos(theta) = (A . B) / (||A|| ||B||)
|
39
|
-
# where A . B is the "dot product" and ||A|| is the magnitude of A
|
40
|
-
# ruby has the 'matrix' library we can use to do these calculations.
|
41
|
-
{
|
42
|
-
similarity: acct_query_dp / (acct_vector.magnitude * query_vector.magnitude),
|
43
|
-
account: account,
|
44
|
-
}
|
45
|
-
end.select { |n| n[:similarity] > 0 }.sort_by { |n| -n[:similarity] }
|
25
|
+
# find most similar documents to query
|
26
|
+
def find_similar(query)
|
27
|
+
(query_scores, corpus_scores) = td_idf_scores_for(query)
|
46
28
|
|
47
|
-
|
29
|
+
query_vector = Vector.elements(query_scores, false)
|
48
30
|
|
49
|
-
|
50
|
-
|
31
|
+
# For each doc, calculate the similarity to the query
|
32
|
+
suggestions = corpus_scores.map do |account, scores|
|
33
|
+
acct_vector = Vector.elements(scores, false)
|
51
34
|
|
52
|
-
|
35
|
+
acct_query_dp = acct_vector.inner_product(query_vector)
|
36
|
+
# similarity is a float between 1 and -1, where 1 is exactly the same and -1 is
|
37
|
+
# exactly opposite
|
38
|
+
# see https://en.wikipedia.org/wiki/Cosine_similarity
|
39
|
+
# cos(theta) = (A . B) / (||A|| ||B||)
|
40
|
+
# where A . B is the "dot product" and ||A|| is the magnitude of A
|
41
|
+
# ruby has the 'matrix' library we can use to do these calculations.
|
42
|
+
{
|
43
|
+
similarity: acct_query_dp / (acct_vector.magnitude * query_vector.magnitude),
|
44
|
+
account: account,
|
45
|
+
}
|
46
|
+
end.select { |n| n[:similarity] > 0 }.sort_by { |n| -n[:similarity] }
|
53
47
|
|
54
|
-
|
55
|
-
query_tokens = tokenize(query)
|
56
|
-
corpus = Set.new
|
57
|
-
corpus_scores = {}
|
58
|
-
query_scores = []
|
59
|
-
num_docs = @accounts.length
|
48
|
+
LOGGER.info "most similar accounts: #{suggestions}"
|
60
49
|
|
61
|
-
|
62
|
-
(token, _count) = n
|
63
|
-
next unless @tokens[token]
|
64
|
-
corpus = corpus.union(Set.new(@tokens[token].keys))
|
50
|
+
return suggestions
|
65
51
|
end
|
66
52
|
|
67
|
-
|
68
|
-
(token, count) = n
|
53
|
+
private
|
69
54
|
|
70
|
-
|
71
|
-
|
55
|
+
def td_idf_scores_for(query)
|
56
|
+
query_tokens = tokenize(query)
|
57
|
+
corpus = Set.new
|
58
|
+
corpus_scores = {}
|
59
|
+
query_scores = []
|
60
|
+
num_docs = @accounts.length
|
61
|
+
|
62
|
+
query_tokens.each do |n|
|
63
|
+
(token, _count) = n
|
64
|
+
next unless @tokens[token]
|
65
|
+
corpus = corpus.union(Set.new(@tokens[token].keys))
|
66
|
+
end
|
72
67
|
|
73
|
-
|
74
|
-
|
75
|
-
count,
|
76
|
-
query_tokens.length,
|
77
|
-
@tokens[token].length,
|
78
|
-
num_docs
|
79
|
-
)
|
68
|
+
query_tokens.each do |n|
|
69
|
+
(token, count) = n
|
80
70
|
|
81
|
-
|
82
|
-
|
83
|
-
corpus_scores[account] ||= []
|
71
|
+
# if no other docs have token, ignore it
|
72
|
+
next unless @tokens[token]
|
84
73
|
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
74
|
+
## First, calculate scores for our query as we're building scores for the corpus
|
75
|
+
query_scores << calc_tf_idf(
|
76
|
+
count,
|
77
|
+
query_tokens.length,
|
78
|
+
@tokens[token].length,
|
89
79
|
num_docs
|
90
80
|
)
|
81
|
+
|
82
|
+
## Next, calculate for the corpus, where our "account" is a document
|
83
|
+
corpus.each do |account|
|
84
|
+
corpus_scores[account] ||= []
|
85
|
+
|
86
|
+
corpus_scores[account] << calc_tf_idf(
|
87
|
+
(@tokens[token][account] || 0),
|
88
|
+
@accounts[account].to_f,
|
89
|
+
@tokens[token].length.to_f,
|
90
|
+
num_docs
|
91
|
+
)
|
92
|
+
end
|
91
93
|
end
|
94
|
+
[query_scores, corpus_scores]
|
92
95
|
end
|
93
|
-
[query_scores, corpus_scores]
|
94
|
-
end
|
95
96
|
|
96
|
-
|
97
|
+
def calc_tf_idf(token_count, num_words_in_doc, df, num_docs)
|
97
98
|
|
98
|
-
|
99
|
-
|
99
|
+
# tf(t,d) = count of t in d / number of words in d
|
100
|
+
tf = token_count / num_words_in_doc.to_f
|
100
101
|
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
102
|
+
# smooth idf weight
|
103
|
+
# see https://en.wikipedia.org/wiki/Tf%E2%80%93idf#Inverse_document_frequency_2
|
104
|
+
# df(t) = num of documents with term t in them
|
105
|
+
# idf(t) = log(N/(1 + df )) + 1
|
106
|
+
idf = Math.log(num_docs.to_f / (1 + df)) + 1
|
106
107
|
|
107
|
-
|
108
|
-
|
108
|
+
tf * idf
|
109
|
+
end
|
109
110
|
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
end
|
111
|
+
def tokenize(str)
|
112
|
+
mk_tokens(str).inject(Hash.new(0)) do |memo, n|
|
113
|
+
memo[n] += 1
|
114
|
+
memo
|
115
|
+
end.to_a
|
116
|
+
end
|
117
117
|
|
118
|
-
def mk_tokens(str)
|
119
|
-
|
118
|
+
def mk_tokens(str)
|
119
|
+
str.downcase.tr(';', ' ').tr("'", '').split(/[^a-z0-9.]+/)
|
120
|
+
end
|
121
|
+
end
|
120
122
|
end
|
data/lib/reckon/ledger_parser.rb
CHANGED
@@ -121,8 +121,13 @@ module Reckon
|
|
121
121
|
def parse(ledger)
|
122
122
|
@entries = []
|
123
123
|
new_entry = {}
|
124
|
+
in_comment = false
|
124
125
|
ledger.strip.split("\n").each do |entry|
|
125
|
-
|
126
|
+
# strip comment lines
|
127
|
+
in_comment = true if entry == 'comment'
|
128
|
+
in_comment = false if entry == 'end comment'
|
129
|
+
next if in_comment
|
130
|
+
next if entry =~ /^\s*$/ || entry =~ /^[;#%|*]/
|
126
131
|
|
127
132
|
# (date, type, code, description), type and code are optional
|
128
133
|
if (m = entry.match(%r{^(\d+[\d/-]+)\s+([*!])?\s*(\([^)]+\))?\s*(.*)$}))
|
@@ -135,6 +140,7 @@ module Reckon
|
|
135
140
|
accounts: []
|
136
141
|
}
|
137
142
|
elsif new_entry[:date] && entry =~ /^\s+/
|
143
|
+
LOGGER.info("Adding new account #{entry}")
|
138
144
|
new_entry[:accounts] << parse_account_line(entry)
|
139
145
|
else
|
140
146
|
LOGGER.info("Unknown entry type: #{entry}")
|
data/lib/reckon/version.rb
CHANGED
@@ -28,6 +28,7 @@ describe Reckon::LedgerParser do
|
|
28
28
|
sized(15){string}.tr(%q{'`:*\\},'').gsub(/\s+/, ' ').gsub(/^[!;<\[( ]+/, '')
|
29
29
|
end
|
30
30
|
currency = choose(*currencies) # to be consistent within the transaction
|
31
|
+
single_line_comments = ";#|%*".split('').map { |n| "#{n} #{call(description)}" }
|
31
32
|
comments = ['', '; ', "\t;#{call(description)}", " ; #{call(description)}"]
|
32
33
|
date = Time.at(range(0, 1_581_389_644)).strftime(choose(*formats))
|
33
34
|
codes = [' ', " (#{string(:alnum).tr('()', '')}) "]
|
@@ -48,6 +49,7 @@ describe Reckon::LedgerParser do
|
|
48
49
|
ledger += "#{call(account_line)}\n"
|
49
50
|
end
|
50
51
|
ledger += "#{call(account)}\n"
|
52
|
+
ledger += choose(*single_line_comments) + "\n"
|
51
53
|
ledger
|
52
54
|
end
|
53
55
|
end.check(1000) do |s|
|
@@ -57,14 +59,34 @@ describe Reckon::LedgerParser do
|
|
57
59
|
ledger_csv = `echo #{safe_s} | ledger csv --date-format '%Y-%m-%d' -f - `
|
58
60
|
ledger_parser_csv = Reckon::LedgerParser.new(s, date_format: '%Y/%m/%d').to_csv.join("\n")
|
59
61
|
|
60
|
-
expected = CSV.parse(ledger_csv.gsub('\"', '""'), headers: headers).map
|
61
|
-
actual = CSV.parse(ledger_parser_csv, headers: headers).map
|
62
|
+
expected = CSV.parse(ledger_csv.gsub('\"', '""'), headers: headers).map(&filter_format)
|
63
|
+
actual = CSV.parse(ledger_parser_csv, headers: headers).map(&filter_format)
|
62
64
|
expected.length.times do |i|
|
63
65
|
expect(actual[i]).to eq(expected[i])
|
64
66
|
end
|
65
67
|
end
|
66
68
|
end
|
67
69
|
|
70
|
+
it 'should filter block comments' do
|
71
|
+
ledger = <<HERE
|
72
|
+
1970/11/01 Dinner should show up
|
73
|
+
Assets:Checking -123.00
|
74
|
+
Expenses:Restaurants
|
75
|
+
|
76
|
+
comment
|
77
|
+
|
78
|
+
1970/11/01 Lunch should NOT show up
|
79
|
+
Assets:Checking -12.00
|
80
|
+
Expenses:Restaurants
|
81
|
+
|
82
|
+
end comment
|
83
|
+
HERE
|
84
|
+
l = Reckon::LedgerParser.new(ledger)
|
85
|
+
expect(l.entries.length).to eq(1)
|
86
|
+
expect(l.entries.first[:desc]).to eq('Dinner should show up')
|
87
|
+
|
88
|
+
end
|
89
|
+
|
68
90
|
it "should ignore non-standard entries" do
|
69
91
|
@ledger.entries.length.should == 7
|
70
92
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: reckon
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Cantino
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2020-
|
13
|
+
date: 2020-05-02 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: rspec
|
@@ -137,6 +137,7 @@ files:
|
|
137
137
|
- lib/reckon/csv_parser.rb
|
138
138
|
- lib/reckon/date_column.rb
|
139
139
|
- lib/reckon/ledger_parser.rb
|
140
|
+
- lib/reckon/logger.rb
|
140
141
|
- lib/reckon/money.rb
|
141
142
|
- lib/reckon/version.rb
|
142
143
|
- reckon.gemspec
|
@@ -196,9 +197,46 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
196
197
|
- !ruby/object:Gem::Version
|
197
198
|
version: '0'
|
198
199
|
requirements: []
|
199
|
-
rubygems_version: 3.0.
|
200
|
+
rubygems_version: 3.0.3
|
200
201
|
signing_key:
|
201
202
|
specification_version: 4
|
202
203
|
summary: Utility for interactively converting and labeling CSV files for the Ledger
|
203
204
|
accounting tool.
|
204
|
-
test_files:
|
205
|
+
test_files:
|
206
|
+
- spec/data_fixtures/51-sample.csv
|
207
|
+
- spec/data_fixtures/51-tokens.yml
|
208
|
+
- spec/data_fixtures/73-sample.csv
|
209
|
+
- spec/data_fixtures/73-tokens.yml
|
210
|
+
- spec/data_fixtures/73-transactions.ledger
|
211
|
+
- spec/data_fixtures/85-date-example.csv
|
212
|
+
- spec/data_fixtures/austrian_example.csv
|
213
|
+
- spec/data_fixtures/bom_utf8_file.csv
|
214
|
+
- spec/data_fixtures/broker_canada_example.csv
|
215
|
+
- spec/data_fixtures/chase.csv
|
216
|
+
- spec/data_fixtures/danish_kroner_nordea_example.csv
|
217
|
+
- spec/data_fixtures/english_date_example.csv
|
218
|
+
- spec/data_fixtures/extratofake.csv
|
219
|
+
- spec/data_fixtures/french_example.csv
|
220
|
+
- spec/data_fixtures/german_date_example.csv
|
221
|
+
- spec/data_fixtures/harder_date_example.csv
|
222
|
+
- spec/data_fixtures/ing.csv
|
223
|
+
- spec/data_fixtures/intuit_mint_example.csv
|
224
|
+
- spec/data_fixtures/invalid_header_example.csv
|
225
|
+
- spec/data_fixtures/inversed_credit_card.csv
|
226
|
+
- spec/data_fixtures/nationwide.csv
|
227
|
+
- spec/data_fixtures/simple.csv
|
228
|
+
- spec/data_fixtures/some_other.csv
|
229
|
+
- spec/data_fixtures/spanish_date_example.csv
|
230
|
+
- spec/data_fixtures/suntrust.csv
|
231
|
+
- spec/data_fixtures/test_money_column.csv
|
232
|
+
- spec/data_fixtures/tokens.yaml
|
233
|
+
- spec/data_fixtures/two_money_columns.csv
|
234
|
+
- spec/data_fixtures/yyyymmdd_date_example.csv
|
235
|
+
- spec/reckon/app_spec.rb
|
236
|
+
- spec/reckon/csv_parser_spec.rb
|
237
|
+
- spec/reckon/date_column_spec.rb
|
238
|
+
- spec/reckon/ledger_parser_spec.rb
|
239
|
+
- spec/reckon/money_column_spec.rb
|
240
|
+
- spec/reckon/money_spec.rb
|
241
|
+
- spec/spec.opts
|
242
|
+
- spec/spec_helper.rb
|