RubyGems - reckon - Versions diffs - 0.5.2 → 0.5.3 - Mend

reckon 0.5.2 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/.ruby-version +1 -1
data/CHANGELOG.md +14 -6
data/Gemfile.lock +1 -1
data/README.md +6 -0
data/lib/reckon.rb +1 -3
data/lib/reckon/cosine_similarity.rb +91 -89
data/lib/reckon/ledger_parser.rb +7 -1
data/lib/reckon/logger.rb +4 -0
data/lib/reckon/version.rb +1 -1
data/spec/reckon/ledger_parser_spec.rb +24 -2
metadata +42 -4

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 70bc1d3d98a4ba08a3bca57069073f165e68041a5e628475b6c6076550f6e419
-  data.tar.gz: 99daf95abf45fd4dd5549d08bb9f3816af596cd3790667ca224baf7d46053ed0
+  metadata.gz: 77139229b37c2dcb66ec4f8494fb9d40f036ed267cee0dad067483568b02b948
+  data.tar.gz: 363a124cf17848e855dede2351f06946e799ead31b2440e586d5c01ae45e63f1
 SHA512:
-  metadata.gz: 6c0790695ec045e5210d20de85e17da49868b396ad78e334f24fdae7511969552a72df90b649b781c8b0e7ddf046cf8f84e2b47e212c576048f86798389bb449
-  data.tar.gz: 18babc20d956315d9abed0cec59503f0859844a48e3cf5ee59d743dda487de762a9ab9787074e7553128d41834938f5eedeb2f92e5fda4fb1ab73939f81d0eb2
+  metadata.gz: 3473f4f80d659d8369151a4b22310159d8b4231df5a24efefd9fc426fa4d27744f2e62fb2c2d17826b0ce4c9a96ef13176ca9e602a47377b6271da49c1324cae
+  data.tar.gz: 323b5fe3aeafba7f04d93b91458d9982e75704c839bd763bf99371ba8f2b11f74d2b3a82a08bc57a56a1765f7a177955f441af03df63f844f0ae2804f842aacc

data/.ruby-version CHANGED

	@@ -1 +1 @@
1	- 2.5
1	+ 2.0.0-p648

data/CHANGELOG.md CHANGED

@@ -1,12 +1,20 @@
 # Changelog
+## [v0.5.3](https://github.com/cantino/reckon/tree/v0.5.3) (2020-05-01)
+[Full Changelog](https://github.com/cantino/reckon/compare/v0.5.2...v0.5.3)
+**Closed issues:**
+- Is reckon failing to handle comments when learning? [\#87](https://github.com/cantino/reckon/issues/87)
+- \[FEATURE REQUEST\] Ask for currency of Account and output in output file in standard format of xxxx TLA for currency [\#84](https://github.com/cantino/reckon/issues/84)
 ## [v0.5.2](https://github.com/cantino/reckon/tree/v0.5.2) (2020-03-07)
 [Full Changelog](https://github.com/cantino/reckon/compare/v0.5.1...v0.5.2)
 **Closed issues:**
-- \[BUG\] Reckon appears not to be parsing ISO standard date yyyy-mm-dd? [\#85](https://github.com/cantino/reckon/issues/85)
 - \[Bug\]? Reckon fails to run on ruby 2.7.0 on Catalina  [\#83](https://github.com/cantino/reckon/issues/83)
 - --account-tokens issue [\#51](https://github.com/cantino/reckon/issues/51)
@@ -204,15 +212,15 @@
 ## [v0.3.3](https://github.com/cantino/reckon/tree/v0.3.3) (2013-01-13)
-[Full Changelog](https://github.com/cantino/reckon/compare/v0.3.1...v0.3.3)
+[Full Changelog](https://github.com/cantino/reckon/compare/v0.3.2...v0.3.3)
-## [v0.3.1](https://github.com/cantino/reckon/tree/v0.3.1) (2012-07-30)
+## [v0.3.2](https://github.com/cantino/reckon/tree/v0.3.2) (2012-07-30)
-[Full Changelog](https://github.com/cantino/reckon/compare/v0.3.2...v0.3.1)
+[Full Changelog](https://github.com/cantino/reckon/compare/v0.3.1...v0.3.2)
-## [v0.3.2](https://github.com/cantino/reckon/tree/v0.3.2) (2012-07-30)
+## [v0.3.1](https://github.com/cantino/reckon/tree/v0.3.1) (2012-07-30)
-[Full Changelog](https://github.com/cantino/reckon/compare/5c07bea3fe63f9b909b4b76bd49f22fd8faf7a29...v0.3.2)
+[Full Changelog](https://github.com/cantino/reckon/compare/5c07bea3fe63f9b909b4b76bd49f22fd8faf7a29...v0.3.1)

data/Gemfile.lock CHANGED

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    reckon (0.5.1)
+    reckon (0.5.2)
       chronic (>= 0.3.0)
       highline (>= 1.5.2)
       rchardet (>= 1.8.0)

data/README.md CHANGED

@@ -109,6 +109,12 @@ You can override them with `--default_outof_account` and `--default_into_account
   (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
 * Send me a pull request. Bonus points for topic branches.
+## Making a release
+* Update lib/reckon/version.rb
+* Run `github_changelog_generator --future-release v$(egrep '"[^"]+"' -o lib/reckon/version.rb |sed -e 's/"//g') --user cantino --project reckon -t $(cat ~/.github_token)`
+* Commit
+* Tag the commit same as in version.rb vX.XX.XX (ex v0.5.2)
 ## Copyright
 Copyright (c) 2013 Andrew Cantino. See LICENSE for details.

data/lib/reckon.rb CHANGED

@@ -10,10 +10,8 @@ require 'terminal-table'
 require 'time'
 require 'logger'
-LOGGER = Logger.new(STDERR)
-LOGGER.level = Logger::WARN
 require_relative 'reckon/version'
+require_relative 'reckon/logger'
 require_relative 'reckon/cosine_similarity'
 require_relative 'reckon/date_column'
 require_relative 'reckon/money'

data/lib/reckon/cosine_similarity.rb CHANGED

@@ -3,118 +3,120 @@ require 'set'
 # Implementation of consine similarity using TF-IDF for vectorization.
 # Used to suggest which account a transaction should be assigned to
-class CosineSimilarity
-  def initialize(options)
-    @options = options
-    @tokens = {}
-    @accounts = Hash.new(0)
-  end
-  def add_document(account, doc)
-    tokenize(doc).each do |n|
-      (token, count) = n
-      @tokens[token] ||= {}
-      @tokens[token][account] ||= 0
-      @tokens[token][account] += count
-      @accounts[account] += count
+module Reckon
+  class CosineSimilarity
+    def initialize(options)
+      @options = options
+      @tokens = {}
+      @accounts = Hash.new(0)
     end
-  end
-  # find most similar documents to query
-  def find_similar(query)
-    (query_scores, corpus_scores) = td_idf_scores_for(query)
-    query_vector = Vector.elements(query_scores, false)
+    def add_document(account, doc)
+      tokenize(doc).each do |n|
+        (token, count) = n
-    # For each doc, calculate the similarity to the query
-    suggestions = corpus_scores.map do |account, scores|
-      acct_vector = Vector.elements(scores, false)
+        @tokens[token] ||= {}
+        @tokens[token][account] ||= 0
+        @tokens[token][account] += count
+        @accounts[account] += count
+      end
+    end
-      acct_query_dp = acct_vector.inner_product(query_vector)
-      # similarity is a float between 1 and -1, where 1 is exactly the same and -1 is
-      # exactly opposite
-      # see https://en.wikipedia.org/wiki/Cosine_similarity
-      # cos(theta) = (A . B) / (||A|| ||B||)
-      # where A . B is the "dot product" and ||A|| is the magnitude of A
-      # ruby has the 'matrix' library we can use to do these calculations.
-      {
-        similarity: acct_query_dp / (acct_vector.magnitude * query_vector.magnitude),
-        account: account,
-      }
-    end.select { |n| n[:similarity] > 0 }.sort_by { |n| -n[:similarity] }
+    # find most similar documents to query
+    def find_similar(query)
+      (query_scores, corpus_scores) = td_idf_scores_for(query)
-    LOGGER.info "most similar accounts: #{suggestions}"
+      query_vector = Vector.elements(query_scores, false)
-    return suggestions
-  end
+      # For each doc, calculate the similarity to the query
+      suggestions = corpus_scores.map do |account, scores|
+        acct_vector = Vector.elements(scores, false)
-  private
+        acct_query_dp = acct_vector.inner_product(query_vector)
+        # similarity is a float between 1 and -1, where 1 is exactly the same and -1 is
+        # exactly opposite
+        # see https://en.wikipedia.org/wiki/Cosine_similarity
+        # cos(theta) = (A . B) / (||A|| ||B||)
+        # where A . B is the "dot product" and ||A|| is the magnitude of A
+        # ruby has the 'matrix' library we can use to do these calculations.
+        {
+          similarity: acct_query_dp / (acct_vector.magnitude * query_vector.magnitude),
+          account: account,
+        }
+      end.select { |n| n[:similarity] > 0 }.sort_by { |n| -n[:similarity] }
-  def td_idf_scores_for(query)
-    query_tokens = tokenize(query)
-    corpus = Set.new
-    corpus_scores = {}
-    query_scores = []
-    num_docs = @accounts.length
+      LOGGER.info "most similar accounts: #{suggestions}"
-    query_tokens.each do |n|
-      (token, _count) = n
-      next unless @tokens[token]
-      corpus = corpus.union(Set.new(@tokens[token].keys))
+      return suggestions
     end
-    query_tokens.each do |n|
-      (token, count) = n
+    private
-      # if no other docs have token, ignore it
-      next unless @tokens[token]
+    def td_idf_scores_for(query)
+      query_tokens = tokenize(query)
+      corpus = Set.new
+      corpus_scores = {}
+      query_scores = []
+      num_docs = @accounts.length
+      query_tokens.each do |n|
+        (token, _count) = n
+        next unless @tokens[token]
+        corpus = corpus.union(Set.new(@tokens[token].keys))
+      end
-      ## First, calculate scores for our query as we're building scores for the corpus
-      query_scores << calc_tf_idf(
-        count,
-        query_tokens.length,
-        @tokens[token].length,
-        num_docs
-      )
+      query_tokens.each do |n|
+        (token, count) = n
-      ## Next, calculate for the corpus, where our "account" is a document
-      corpus.each do |account|
-        corpus_scores[account] ||= []
+        # if no other docs have token, ignore it
+        next unless @tokens[token]
-        corpus_scores[account] << calc_tf_idf(
-          (@tokens[token][account] || 0),
-          @accounts[account].to_f,
-          @tokens[token].length.to_f,
+        ## First, calculate scores for our query as we're building scores for the corpus
+        query_scores << calc_tf_idf(
+          count,
+          query_tokens.length,
+          @tokens[token].length,
           num_docs
         )
+        ## Next, calculate for the corpus, where our "account" is a document
+        corpus.each do |account|
+          corpus_scores[account] ||= []
+          corpus_scores[account] << calc_tf_idf(
+            (@tokens[token][account] || 0),
+            @accounts[account].to_f,
+            @tokens[token].length.to_f,
+            num_docs
+          )
+        end
       end
+      [query_scores, corpus_scores]
     end
-    [query_scores, corpus_scores]
-  end
-  def calc_tf_idf(token_count, num_words_in_doc, df, num_docs)
+    def calc_tf_idf(token_count, num_words_in_doc, df, num_docs)
-    # tf(t,d) = count of t in d / number of words in d
-    tf = token_count / num_words_in_doc.to_f
+      # tf(t,d) = count of t in d / number of words in d
+      tf = token_count / num_words_in_doc.to_f
-    # smooth idf weight
-    # see https://en.wikipedia.org/wiki/Tf%E2%80%93idf#Inverse_document_frequency_2
-    # df(t) = num of documents with term t in them
-    # idf(t) = log(N/(1 + df )) + 1
-    idf = Math.log(num_docs.to_f / (1 + df)) + 1
+      # smooth idf weight
+      # see https://en.wikipedia.org/wiki/Tf%E2%80%93idf#Inverse_document_frequency_2
+      # df(t) = num of documents with term t in them
+      # idf(t) = log(N/(1 + df )) + 1
+      idf = Math.log(num_docs.to_f / (1 + df)) + 1
-    tf * idf
-  end
+      tf * idf
+    end
-  def tokenize(str)
-    mk_tokens(str).inject(Hash.new(0)) do |memo, n|
-      memo[n] += 1
-      memo
-    end.to_a
-  end
-end
+    def tokenize(str)
+      mk_tokens(str).inject(Hash.new(0)) do |memo, n|
+        memo[n] += 1
+        memo
+      end.to_a
+    end
-def mk_tokens(str)
-  str.downcase.tr(';', ' ').tr("'", '').split(/[^a-z0-9.]+/)
+    def mk_tokens(str)
+      str.downcase.tr(';', ' ').tr("'", '').split(/[^a-z0-9.]+/)
+    end
+  end
 end

data/lib/reckon/ledger_parser.rb CHANGED

@@ -121,8 +121,13 @@ module Reckon
     def parse(ledger)
       @entries = []
       new_entry = {}
+      in_comment = false
       ledger.strip.split("\n").each do |entry|
-        next if entry =~ /^\s*$/ || entry =~ /^\s*;/
+        # strip comment lines
+        in_comment = true if entry == 'comment'
+        in_comment = false if entry == 'end comment'
+        next if in_comment
+        next if entry =~ /^\s*$/ || entry =~ /^[;#%|*]/
         # (date, type, code, description), type and code are optional
         if (m = entry.match(%r{^(\d+[\d/-]+)\s+([*!])?\s*(\([^)]+\))?\s*(.*)$}))
@@ -135,6 +140,7 @@ module Reckon
             accounts: []
           }
         elsif new_entry[:date] && entry =~ /^\s+/
+          LOGGER.info("Adding new account #{entry}")
           new_entry[:accounts] << parse_account_line(entry)
         else
           LOGGER.info("Unknown entry type: #{entry}")

data/lib/reckon/logger.rb ADDED

@@ -0,0 +1,4 @@
+module Reckon
+  LOGGER = Logger.new(STDERR)
+  LOGGER.level = Logger::WARN
+end

data/lib/reckon/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Reckon
-  VERSION = "0.5.2"
+  VERSION = "0.5.3"
 end

data/spec/reckon/ledger_parser_spec.rb CHANGED

@@ -28,6 +28,7 @@ describe Reckon::LedgerParser do
             sized(15){string}.tr(%q{'`:*\\},'').gsub(/\s+/, ' ').gsub(/^[!;<\[( ]+/, '')
           end
           currency = choose(*currencies) # to be consistent within the transaction
+          single_line_comments = ";#|%*".split('').map { |n| "#{n} #{call(description)}" }
           comments = ['', ';   ', "\t;#{call(description)}", "  ; #{call(description)}"]
           date = Time.at(range(0, 1_581_389_644)).strftime(choose(*formats))
           codes = [' ', " (#{string(:alnum).tr('()', '')}) "]
@@ -48,6 +49,7 @@ describe Reckon::LedgerParser do
             ledger += "#{call(account_line)}\n"
           end
           ledger += "#{call(account)}\n"
+          ledger += choose(*single_line_comments) + "\n"
           ledger
         end
       end.check(1000) do |s|
@@ -57,14 +59,34 @@ describe Reckon::LedgerParser do
         ledger_csv = `echo #{safe_s} | ledger csv --date-format '%Y-%m-%d' -f - `
         ledger_parser_csv = Reckon::LedgerParser.new(s, date_format: '%Y/%m/%d').to_csv.join("\n")
-        expected = CSV.parse(ledger_csv.gsub('\"', '""'), headers: headers).map &filter_format
-        actual = CSV.parse(ledger_parser_csv, headers: headers).map &filter_format
+        expected = CSV.parse(ledger_csv.gsub('\"', '""'), headers: headers).map(&filter_format)
+        actual = CSV.parse(ledger_parser_csv, headers: headers).map(&filter_format)
         expected.length.times do |i|
           expect(actual[i]).to eq(expected[i])
         end
       end
     end
+    it 'should filter block comments' do
+      ledger = <<HERE
+1970/11/01 Dinner should show up
+  Assets:Checking  -123.00
+  Expenses:Restaurants
+comment
+1970/11/01 Lunch should NOT show up
+  Assets:Checking  -12.00
+  Expenses:Restaurants
+end comment
+HERE
+      l = Reckon::LedgerParser.new(ledger)
+      expect(l.entries.length).to eq(1)
+      expect(l.entries.first[:desc]).to eq('Dinner should show up')
+    end
     it "should ignore non-standard entries" do
       @ledger.entries.length.should == 7
     end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: reckon
 version: !ruby/object:Gem::Version
-  version: 0.5.2
+  version: 0.5.3
 platform: ruby
 authors:
 - Andrew Cantino
@@ -10,7 +10,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2020-03-07 00:00:00.000000000 Z
+date: 2020-05-02 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rspec
@@ -137,6 +137,7 @@ files:
 - lib/reckon/csv_parser.rb
 - lib/reckon/date_column.rb
 - lib/reckon/ledger_parser.rb
+- lib/reckon/logger.rb
 - lib/reckon/money.rb
 - lib/reckon/version.rb
 - reckon.gemspec
@@ -196,9 +197,46 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.0.6
+rubygems_version: 3.0.3
 signing_key:
 specification_version: 4
 summary: Utility for interactively converting and labeling CSV files for the Ledger
   accounting tool.
-test_files: []
+test_files:
+- spec/data_fixtures/51-sample.csv
+- spec/data_fixtures/51-tokens.yml
+- spec/data_fixtures/73-sample.csv
+- spec/data_fixtures/73-tokens.yml
+- spec/data_fixtures/73-transactions.ledger
+- spec/data_fixtures/85-date-example.csv
+- spec/data_fixtures/austrian_example.csv
+- spec/data_fixtures/bom_utf8_file.csv
+- spec/data_fixtures/broker_canada_example.csv
+- spec/data_fixtures/chase.csv
+- spec/data_fixtures/danish_kroner_nordea_example.csv
+- spec/data_fixtures/english_date_example.csv
+- spec/data_fixtures/extratofake.csv
+- spec/data_fixtures/french_example.csv
+- spec/data_fixtures/german_date_example.csv
+- spec/data_fixtures/harder_date_example.csv
+- spec/data_fixtures/ing.csv
+- spec/data_fixtures/intuit_mint_example.csv
+- spec/data_fixtures/invalid_header_example.csv
+- spec/data_fixtures/inversed_credit_card.csv
+- spec/data_fixtures/nationwide.csv
+- spec/data_fixtures/simple.csv
+- spec/data_fixtures/some_other.csv
+- spec/data_fixtures/spanish_date_example.csv
+- spec/data_fixtures/suntrust.csv
+- spec/data_fixtures/test_money_column.csv
+- spec/data_fixtures/tokens.yaml
+- spec/data_fixtures/two_money_columns.csv
+- spec/data_fixtures/yyyymmdd_date_example.csv
+- spec/reckon/app_spec.rb
+- spec/reckon/csv_parser_spec.rb
+- spec/reckon/date_column_spec.rb
+- spec/reckon/ledger_parser_spec.rb
+- spec/reckon/money_column_spec.rb
+- spec/reckon/money_spec.rb
+- spec/spec.opts
+- spec/spec_helper.rb