reckon 0.5.2 → 0.5.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 70bc1d3d98a4ba08a3bca57069073f165e68041a5e628475b6c6076550f6e419
4
- data.tar.gz: 99daf95abf45fd4dd5549d08bb9f3816af596cd3790667ca224baf7d46053ed0
3
+ metadata.gz: 77139229b37c2dcb66ec4f8494fb9d40f036ed267cee0dad067483568b02b948
4
+ data.tar.gz: 363a124cf17848e855dede2351f06946e799ead31b2440e586d5c01ae45e63f1
5
5
  SHA512:
6
- metadata.gz: 6c0790695ec045e5210d20de85e17da49868b396ad78e334f24fdae7511969552a72df90b649b781c8b0e7ddf046cf8f84e2b47e212c576048f86798389bb449
7
- data.tar.gz: 18babc20d956315d9abed0cec59503f0859844a48e3cf5ee59d743dda487de762a9ab9787074e7553128d41834938f5eedeb2f92e5fda4fb1ab73939f81d0eb2
6
+ metadata.gz: 3473f4f80d659d8369151a4b22310159d8b4231df5a24efefd9fc426fa4d27744f2e62fb2c2d17826b0ce4c9a96ef13176ca9e602a47377b6271da49c1324cae
7
+ data.tar.gz: 323b5fe3aeafba7f04d93b91458d9982e75704c839bd763bf99371ba8f2b11f74d2b3a82a08bc57a56a1765f7a177955f441af03df63f844f0ae2804f842aacc
@@ -1 +1 @@
1
- 2.5
1
+ 2.0.0-p648
@@ -1,12 +1,20 @@
1
1
  # Changelog
2
2
 
3
+ ## [v0.5.3](https://github.com/cantino/reckon/tree/v0.5.3) (2020-05-01)
4
+
5
+ [Full Changelog](https://github.com/cantino/reckon/compare/v0.5.2...v0.5.3)
6
+
7
+ **Closed issues:**
8
+
9
+ - Is reckon failing to handle comments when learning? [\#87](https://github.com/cantino/reckon/issues/87)
10
+ - \[FEATURE REQUEST\] Ask for currency of Account and output in output file in standard format of xxxx TLA for currency [\#84](https://github.com/cantino/reckon/issues/84)
11
+
3
12
  ## [v0.5.2](https://github.com/cantino/reckon/tree/v0.5.2) (2020-03-07)
4
13
 
5
14
  [Full Changelog](https://github.com/cantino/reckon/compare/v0.5.1...v0.5.2)
6
15
 
7
16
  **Closed issues:**
8
17
 
9
- - \[BUG\] Reckon appears not to be parsing ISO standard date yyyy-mm-dd? [\#85](https://github.com/cantino/reckon/issues/85)
10
18
  - \[Bug\]? Reckon fails to run on ruby 2.7.0 on Catalina [\#83](https://github.com/cantino/reckon/issues/83)
11
19
  - --account-tokens issue [\#51](https://github.com/cantino/reckon/issues/51)
12
20
 
@@ -204,15 +212,15 @@
204
212
 
205
213
  ## [v0.3.3](https://github.com/cantino/reckon/tree/v0.3.3) (2013-01-13)
206
214
 
207
- [Full Changelog](https://github.com/cantino/reckon/compare/v0.3.1...v0.3.3)
215
+ [Full Changelog](https://github.com/cantino/reckon/compare/v0.3.2...v0.3.3)
208
216
 
209
- ## [v0.3.1](https://github.com/cantino/reckon/tree/v0.3.1) (2012-07-30)
217
+ ## [v0.3.2](https://github.com/cantino/reckon/tree/v0.3.2) (2012-07-30)
210
218
 
211
- [Full Changelog](https://github.com/cantino/reckon/compare/v0.3.2...v0.3.1)
219
+ [Full Changelog](https://github.com/cantino/reckon/compare/v0.3.1...v0.3.2)
212
220
 
213
- ## [v0.3.2](https://github.com/cantino/reckon/tree/v0.3.2) (2012-07-30)
221
+ ## [v0.3.1](https://github.com/cantino/reckon/tree/v0.3.1) (2012-07-30)
214
222
 
215
- [Full Changelog](https://github.com/cantino/reckon/compare/5c07bea3fe63f9b909b4b76bd49f22fd8faf7a29...v0.3.2)
223
+ [Full Changelog](https://github.com/cantino/reckon/compare/5c07bea3fe63f9b909b4b76bd49f22fd8faf7a29...v0.3.1)
216
224
 
217
225
 
218
226
 
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- reckon (0.5.1)
4
+ reckon (0.5.2)
5
5
  chronic (>= 0.3.0)
6
6
  highline (>= 1.5.2)
7
7
  rchardet (>= 1.8.0)
data/README.md CHANGED
@@ -109,6 +109,12 @@ You can override them with `--default_outof_account` and `--default_into_account
109
109
  (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
110
110
  * Send me a pull request. Bonus points for topic branches.
111
111
 
112
+ ## Making a release
113
+ * Update lib/reckon/version.rb
114
+ * Run `github_changelog_generator --future-release v$(egrep '"[^"]+"' -o lib/reckon/version.rb |sed -e 's/"//g') --user cantino --project reckon -t $(cat ~/.github_token)`
115
+ * Commit
116
+ * Tag the commit same as in version.rb vX.XX.XX (ex v0.5.2)
117
+
112
118
  ## Copyright
113
119
 
114
120
  Copyright (c) 2013 Andrew Cantino. See LICENSE for details.
@@ -10,10 +10,8 @@ require 'terminal-table'
10
10
  require 'time'
11
11
  require 'logger'
12
12
 
13
- LOGGER = Logger.new(STDERR)
14
- LOGGER.level = Logger::WARN
15
-
16
13
  require_relative 'reckon/version'
14
+ require_relative 'reckon/logger'
17
15
  require_relative 'reckon/cosine_similarity'
18
16
  require_relative 'reckon/date_column'
19
17
  require_relative 'reckon/money'
@@ -3,118 +3,120 @@ require 'set'
3
3
 
4
4
  # Implementation of consine similarity using TF-IDF for vectorization.
5
5
  # Used to suggest which account a transaction should be assigned to
6
- class CosineSimilarity
7
- def initialize(options)
8
- @options = options
9
- @tokens = {}
10
- @accounts = Hash.new(0)
11
- end
12
-
13
- def add_document(account, doc)
14
- tokenize(doc).each do |n|
15
- (token, count) = n
16
-
17
- @tokens[token] ||= {}
18
- @tokens[token][account] ||= 0
19
- @tokens[token][account] += count
20
- @accounts[account] += count
6
+ module Reckon
7
+ class CosineSimilarity
8
+ def initialize(options)
9
+ @options = options
10
+ @tokens = {}
11
+ @accounts = Hash.new(0)
21
12
  end
22
- end
23
-
24
- # find most similar documents to query
25
- def find_similar(query)
26
- (query_scores, corpus_scores) = td_idf_scores_for(query)
27
13
 
28
- query_vector = Vector.elements(query_scores, false)
14
+ def add_document(account, doc)
15
+ tokenize(doc).each do |n|
16
+ (token, count) = n
29
17
 
30
- # For each doc, calculate the similarity to the query
31
- suggestions = corpus_scores.map do |account, scores|
32
- acct_vector = Vector.elements(scores, false)
18
+ @tokens[token] ||= {}
19
+ @tokens[token][account] ||= 0
20
+ @tokens[token][account] += count
21
+ @accounts[account] += count
22
+ end
23
+ end
33
24
 
34
- acct_query_dp = acct_vector.inner_product(query_vector)
35
- # similarity is a float between 1 and -1, where 1 is exactly the same and -1 is
36
- # exactly opposite
37
- # see https://en.wikipedia.org/wiki/Cosine_similarity
38
- # cos(theta) = (A . B) / (||A|| ||B||)
39
- # where A . B is the "dot product" and ||A|| is the magnitude of A
40
- # ruby has the 'matrix' library we can use to do these calculations.
41
- {
42
- similarity: acct_query_dp / (acct_vector.magnitude * query_vector.magnitude),
43
- account: account,
44
- }
45
- end.select { |n| n[:similarity] > 0 }.sort_by { |n| -n[:similarity] }
25
+ # find most similar documents to query
26
+ def find_similar(query)
27
+ (query_scores, corpus_scores) = td_idf_scores_for(query)
46
28
 
47
- LOGGER.info "most similar accounts: #{suggestions}"
29
+ query_vector = Vector.elements(query_scores, false)
48
30
 
49
- return suggestions
50
- end
31
+ # For each doc, calculate the similarity to the query
32
+ suggestions = corpus_scores.map do |account, scores|
33
+ acct_vector = Vector.elements(scores, false)
51
34
 
52
- private
35
+ acct_query_dp = acct_vector.inner_product(query_vector)
36
+ # similarity is a float between 1 and -1, where 1 is exactly the same and -1 is
37
+ # exactly opposite
38
+ # see https://en.wikipedia.org/wiki/Cosine_similarity
39
+ # cos(theta) = (A . B) / (||A|| ||B||)
40
+ # where A . B is the "dot product" and ||A|| is the magnitude of A
41
+ # ruby has the 'matrix' library we can use to do these calculations.
42
+ {
43
+ similarity: acct_query_dp / (acct_vector.magnitude * query_vector.magnitude),
44
+ account: account,
45
+ }
46
+ end.select { |n| n[:similarity] > 0 }.sort_by { |n| -n[:similarity] }
53
47
 
54
- def td_idf_scores_for(query)
55
- query_tokens = tokenize(query)
56
- corpus = Set.new
57
- corpus_scores = {}
58
- query_scores = []
59
- num_docs = @accounts.length
48
+ LOGGER.info "most similar accounts: #{suggestions}"
60
49
 
61
- query_tokens.each do |n|
62
- (token, _count) = n
63
- next unless @tokens[token]
64
- corpus = corpus.union(Set.new(@tokens[token].keys))
50
+ return suggestions
65
51
  end
66
52
 
67
- query_tokens.each do |n|
68
- (token, count) = n
53
+ private
69
54
 
70
- # if no other docs have token, ignore it
71
- next unless @tokens[token]
55
+ def td_idf_scores_for(query)
56
+ query_tokens = tokenize(query)
57
+ corpus = Set.new
58
+ corpus_scores = {}
59
+ query_scores = []
60
+ num_docs = @accounts.length
61
+
62
+ query_tokens.each do |n|
63
+ (token, _count) = n
64
+ next unless @tokens[token]
65
+ corpus = corpus.union(Set.new(@tokens[token].keys))
66
+ end
72
67
 
73
- ## First, calculate scores for our query as we're building scores for the corpus
74
- query_scores << calc_tf_idf(
75
- count,
76
- query_tokens.length,
77
- @tokens[token].length,
78
- num_docs
79
- )
68
+ query_tokens.each do |n|
69
+ (token, count) = n
80
70
 
81
- ## Next, calculate for the corpus, where our "account" is a document
82
- corpus.each do |account|
83
- corpus_scores[account] ||= []
71
+ # if no other docs have token, ignore it
72
+ next unless @tokens[token]
84
73
 
85
- corpus_scores[account] << calc_tf_idf(
86
- (@tokens[token][account] || 0),
87
- @accounts[account].to_f,
88
- @tokens[token].length.to_f,
74
+ ## First, calculate scores for our query as we're building scores for the corpus
75
+ query_scores << calc_tf_idf(
76
+ count,
77
+ query_tokens.length,
78
+ @tokens[token].length,
89
79
  num_docs
90
80
  )
81
+
82
+ ## Next, calculate for the corpus, where our "account" is a document
83
+ corpus.each do |account|
84
+ corpus_scores[account] ||= []
85
+
86
+ corpus_scores[account] << calc_tf_idf(
87
+ (@tokens[token][account] || 0),
88
+ @accounts[account].to_f,
89
+ @tokens[token].length.to_f,
90
+ num_docs
91
+ )
92
+ end
91
93
  end
94
+ [query_scores, corpus_scores]
92
95
  end
93
- [query_scores, corpus_scores]
94
- end
95
96
 
96
- def calc_tf_idf(token_count, num_words_in_doc, df, num_docs)
97
+ def calc_tf_idf(token_count, num_words_in_doc, df, num_docs)
97
98
 
98
- # tf(t,d) = count of t in d / number of words in d
99
- tf = token_count / num_words_in_doc.to_f
99
+ # tf(t,d) = count of t in d / number of words in d
100
+ tf = token_count / num_words_in_doc.to_f
100
101
 
101
- # smooth idf weight
102
- # see https://en.wikipedia.org/wiki/Tf%E2%80%93idf#Inverse_document_frequency_2
103
- # df(t) = num of documents with term t in them
104
- # idf(t) = log(N/(1 + df )) + 1
105
- idf = Math.log(num_docs.to_f / (1 + df)) + 1
102
+ # smooth idf weight
103
+ # see https://en.wikipedia.org/wiki/Tf%E2%80%93idf#Inverse_document_frequency_2
104
+ # df(t) = num of documents with term t in them
105
+ # idf(t) = log(N/(1 + df )) + 1
106
+ idf = Math.log(num_docs.to_f / (1 + df)) + 1
106
107
 
107
- tf * idf
108
- end
108
+ tf * idf
109
+ end
109
110
 
110
- def tokenize(str)
111
- mk_tokens(str).inject(Hash.new(0)) do |memo, n|
112
- memo[n] += 1
113
- memo
114
- end.to_a
115
- end
116
- end
111
+ def tokenize(str)
112
+ mk_tokens(str).inject(Hash.new(0)) do |memo, n|
113
+ memo[n] += 1
114
+ memo
115
+ end.to_a
116
+ end
117
117
 
118
- def mk_tokens(str)
119
- str.downcase.tr(';', ' ').tr("'", '').split(/[^a-z0-9.]+/)
118
+ def mk_tokens(str)
119
+ str.downcase.tr(';', ' ').tr("'", '').split(/[^a-z0-9.]+/)
120
+ end
121
+ end
120
122
  end
@@ -121,8 +121,13 @@ module Reckon
121
121
  def parse(ledger)
122
122
  @entries = []
123
123
  new_entry = {}
124
+ in_comment = false
124
125
  ledger.strip.split("\n").each do |entry|
125
- next if entry =~ /^\s*$/ || entry =~ /^\s*;/
126
+ # strip comment lines
127
+ in_comment = true if entry == 'comment'
128
+ in_comment = false if entry == 'end comment'
129
+ next if in_comment
130
+ next if entry =~ /^\s*$/ || entry =~ /^[;#%|*]/
126
131
 
127
132
  # (date, type, code, description), type and code are optional
128
133
  if (m = entry.match(%r{^(\d+[\d/-]+)\s+([*!])?\s*(\([^)]+\))?\s*(.*)$}))
@@ -135,6 +140,7 @@ module Reckon
135
140
  accounts: []
136
141
  }
137
142
  elsif new_entry[:date] && entry =~ /^\s+/
143
+ LOGGER.info("Adding new account #{entry}")
138
144
  new_entry[:accounts] << parse_account_line(entry)
139
145
  else
140
146
  LOGGER.info("Unknown entry type: #{entry}")
@@ -0,0 +1,4 @@
1
+ module Reckon
2
+ LOGGER = Logger.new(STDERR)
3
+ LOGGER.level = Logger::WARN
4
+ end
@@ -1,3 +1,3 @@
1
1
  module Reckon
2
- VERSION = "0.5.2"
2
+ VERSION = "0.5.3"
3
3
  end
@@ -28,6 +28,7 @@ describe Reckon::LedgerParser do
28
28
  sized(15){string}.tr(%q{'`:*\\},'').gsub(/\s+/, ' ').gsub(/^[!;<\[( ]+/, '')
29
29
  end
30
30
  currency = choose(*currencies) # to be consistent within the transaction
31
+ single_line_comments = ";#|%*".split('').map { |n| "#{n} #{call(description)}" }
31
32
  comments = ['', '; ', "\t;#{call(description)}", " ; #{call(description)}"]
32
33
  date = Time.at(range(0, 1_581_389_644)).strftime(choose(*formats))
33
34
  codes = [' ', " (#{string(:alnum).tr('()', '')}) "]
@@ -48,6 +49,7 @@ describe Reckon::LedgerParser do
48
49
  ledger += "#{call(account_line)}\n"
49
50
  end
50
51
  ledger += "#{call(account)}\n"
52
+ ledger += choose(*single_line_comments) + "\n"
51
53
  ledger
52
54
  end
53
55
  end.check(1000) do |s|
@@ -57,14 +59,34 @@ describe Reckon::LedgerParser do
57
59
  ledger_csv = `echo #{safe_s} | ledger csv --date-format '%Y-%m-%d' -f - `
58
60
  ledger_parser_csv = Reckon::LedgerParser.new(s, date_format: '%Y/%m/%d').to_csv.join("\n")
59
61
 
60
- expected = CSV.parse(ledger_csv.gsub('\"', '""'), headers: headers).map &filter_format
61
- actual = CSV.parse(ledger_parser_csv, headers: headers).map &filter_format
62
+ expected = CSV.parse(ledger_csv.gsub('\"', '""'), headers: headers).map(&filter_format)
63
+ actual = CSV.parse(ledger_parser_csv, headers: headers).map(&filter_format)
62
64
  expected.length.times do |i|
63
65
  expect(actual[i]).to eq(expected[i])
64
66
  end
65
67
  end
66
68
  end
67
69
 
70
+ it 'should filter block comments' do
71
+ ledger = <<HERE
72
+ 1970/11/01 Dinner should show up
73
+ Assets:Checking -123.00
74
+ Expenses:Restaurants
75
+
76
+ comment
77
+
78
+ 1970/11/01 Lunch should NOT show up
79
+ Assets:Checking -12.00
80
+ Expenses:Restaurants
81
+
82
+ end comment
83
+ HERE
84
+ l = Reckon::LedgerParser.new(ledger)
85
+ expect(l.entries.length).to eq(1)
86
+ expect(l.entries.first[:desc]).to eq('Dinner should show up')
87
+
88
+ end
89
+
68
90
  it "should ignore non-standard entries" do
69
91
  @ledger.entries.length.should == 7
70
92
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: reckon
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.5.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Cantino
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2020-03-07 00:00:00.000000000 Z
13
+ date: 2020-05-02 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: rspec
@@ -137,6 +137,7 @@ files:
137
137
  - lib/reckon/csv_parser.rb
138
138
  - lib/reckon/date_column.rb
139
139
  - lib/reckon/ledger_parser.rb
140
+ - lib/reckon/logger.rb
140
141
  - lib/reckon/money.rb
141
142
  - lib/reckon/version.rb
142
143
  - reckon.gemspec
@@ -196,9 +197,46 @@ required_rubygems_version: !ruby/object:Gem::Requirement
196
197
  - !ruby/object:Gem::Version
197
198
  version: '0'
198
199
  requirements: []
199
- rubygems_version: 3.0.6
200
+ rubygems_version: 3.0.3
200
201
  signing_key:
201
202
  specification_version: 4
202
203
  summary: Utility for interactively converting and labeling CSV files for the Ledger
203
204
  accounting tool.
204
- test_files: []
205
+ test_files:
206
+ - spec/data_fixtures/51-sample.csv
207
+ - spec/data_fixtures/51-tokens.yml
208
+ - spec/data_fixtures/73-sample.csv
209
+ - spec/data_fixtures/73-tokens.yml
210
+ - spec/data_fixtures/73-transactions.ledger
211
+ - spec/data_fixtures/85-date-example.csv
212
+ - spec/data_fixtures/austrian_example.csv
213
+ - spec/data_fixtures/bom_utf8_file.csv
214
+ - spec/data_fixtures/broker_canada_example.csv
215
+ - spec/data_fixtures/chase.csv
216
+ - spec/data_fixtures/danish_kroner_nordea_example.csv
217
+ - spec/data_fixtures/english_date_example.csv
218
+ - spec/data_fixtures/extratofake.csv
219
+ - spec/data_fixtures/french_example.csv
220
+ - spec/data_fixtures/german_date_example.csv
221
+ - spec/data_fixtures/harder_date_example.csv
222
+ - spec/data_fixtures/ing.csv
223
+ - spec/data_fixtures/intuit_mint_example.csv
224
+ - spec/data_fixtures/invalid_header_example.csv
225
+ - spec/data_fixtures/inversed_credit_card.csv
226
+ - spec/data_fixtures/nationwide.csv
227
+ - spec/data_fixtures/simple.csv
228
+ - spec/data_fixtures/some_other.csv
229
+ - spec/data_fixtures/spanish_date_example.csv
230
+ - spec/data_fixtures/suntrust.csv
231
+ - spec/data_fixtures/test_money_column.csv
232
+ - spec/data_fixtures/tokens.yaml
233
+ - spec/data_fixtures/two_money_columns.csv
234
+ - spec/data_fixtures/yyyymmdd_date_example.csv
235
+ - spec/reckon/app_spec.rb
236
+ - spec/reckon/csv_parser_spec.rb
237
+ - spec/reckon/date_column_spec.rb
238
+ - spec/reckon/ledger_parser_spec.rb
239
+ - spec/reckon/money_column_spec.rb
240
+ - spec/reckon/money_spec.rb
241
+ - spec/spec.opts
242
+ - spec/spec_helper.rb