RubyGems - reckon - Versions diffs - 0.9.0 → 0.9.1 - Mend

reckon 0.9.0 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

checksums.yaml +4 -4
data/.github/workflows/ruby.yml +2 -0
data/.rubocop.yml +20 -0
data/CHANGELOG.md +10 -0
data/Gemfile.lock +1 -1
data/Rakefile +2 -2
data/bin/build-new-version.sh +3 -2
data/bin/reckon +1 -1
data/lib/reckon/app.rb +27 -24
data/lib/reckon/beancount_parser.rb +150 -0
data/lib/reckon/cosine_similarity.rb +0 -1
data/lib/reckon/csv_parser.rb +73 -37
data/lib/reckon/date_column.rb +18 -7
data/lib/reckon/ledger_parser.rb +23 -15
data/lib/reckon/money.rb +18 -16
data/lib/reckon/options.rb +44 -19
data/lib/reckon/version.rb +1 -1
data/lib/reckon.rb +1 -0
data/spec/cosine_training_and_test.rb +1 -1
data/spec/data_fixtures/multi-line-field.csv +5 -0
data/spec/integration/invalid_header_example/output.ledger +6 -7
data/spec/integration/invalid_header_example/test_args +1 -1
data/spec/integration/tab_delimited_file/input.csv +2 -0
data/spec/integration/tab_delimited_file/output.ledger +8 -0
data/spec/integration/tab_delimited_file/test_args +1 -0
data/spec/reckon/csv_parser_spec.rb +85 -26
data/spec/reckon/date_column_spec.rb +6 -0
data/spec/reckon/ledger_parser_spec.rb +25 -23
data/spec/reckon/options_spec.rb +2 -2
data/spec/spec_helper.rb +2 -0
metadata +8 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 26126e1e4ead4fcd93a94093f8e4a4fd557a9c46ccc6983198d4bdd516e639ee
-  data.tar.gz: f196a86ca58ebaeee4f27030bb21af9fd3c3001d20d06b74f2063ea42d96cfcc
+  metadata.gz: 03c20b48d4333969c8304a5bb9a3c01fc6053050ab9146329ce14ae6a9886b38
+  data.tar.gz: 27a2ce4e8db5c7818cc4cefb19f180a7c727190f0a990403f565fad503e749a9
 SHA512:
-  metadata.gz: 02ad471caf5a5b6b69d98cde9cfa5e0579bbe680f50bb7fa8c1f5b3a7908018a48075531674a27d23139913be9156f889cac8010e06de87439c0665b064d7171
-  data.tar.gz: a8abf375fab7ba91d31a0d05ee372a5fc788feca0fe6a61859cb4ed6c72a3387238a4df2d2dac2ce300fe4f047142ea18bbe03a364e008e2b9588143f0223852
+  metadata.gz: 2f569b3d5cf4038714065a6d184d6c07f57d10598e5efc610eeb9919e8b18c65aff5e5329ab89a9ed30f72cabce9d11f5645af4d0df3bda6d05ad9afd988f7e7
+  data.tar.gz: 1783a63ba138c2b87a0756d6b9bcfbce068daf977e582a4c920a37ff50358328f8514f308dbbf932ef5cc4111e9e52dadfaed5876b9d30f4759d4a1eb31299fa

data/.github/workflows/ruby.yml CHANGED Viewed

@@ -9,7 +9,9 @@ name: Build Status
 on:
   push:
+    branches: [ master ]
   pull_request:
+    branches: [ master ]
 jobs:
   test:

data/.rubocop.yml ADDED Viewed

@@ -0,0 +1,20 @@
+Layout/LineLength:
+  Max: 88
+Style/StringLiterals:
+  Enabled: false
+Style/RedundantReturn:
+  Enabled: false
+Metrics/ClassLength:
+  Enabled: False
+Metrics/MethodLength:
+  Enabled: False
+Metrics/AbcSize:
+  Enabled: False
+Style/NumericPredicate:
+  Enabled: False

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,15 @@
 # Changelog
+## [v0.9.1](https://github.com/cantino/reckon/tree/v0.9.1) (2023-03-19)
+[Full Changelog](https://github.com/cantino/reckon/compare/v0.9.0...v0.9.1)
+**Closed issues:**
+- More than one column support [\#120](https://github.com/cantino/reckon/issues/120)
+- Beancount support [\#119](https://github.com/cantino/reckon/issues/119)
+- Problem with importing CSV [\#60](https://github.com/cantino/reckon/issues/60)
 ## [v0.9.0](https://github.com/cantino/reckon/tree/v0.9.0) (2023-02-23)
 [Full Changelog](https://github.com/cantino/reckon/compare/v0.9.0-beta...v0.9.0)

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    reckon (0.9.0)
+    reckon (0.9.1)
       chronic (>= 0.3.0)
       highline (>= 1.5.2)
       matrix (>= 0.4.2)

data/Rakefile CHANGED Viewed

@@ -13,10 +13,10 @@ task :test_all do
   puts "Running unit tests"
   Rake::Task["spec"].invoke
   puts "Running integration tests"
-  Rake::Task["integration_tests"].invoke
+  Rake::Task["test_integration"].invoke
 end
-task :integration_tests do
+task :test_integration do
   cmd = 'prove -v ./spec/integration/test.sh'
   raise 'Integration tests failed' unless system(cmd)
 end

data/bin/build-new-version.sh CHANGED Viewed

@@ -1,6 +1,6 @@
 #!/bin/bash
-set -e
+set -xe
 VERSION=$1
@@ -8,7 +8,7 @@ echo "Install github_changelog_generator"
 gem install --user github_changelog_generator
 echo "Update 'lib/reckon/version.rb'"
-echo -e "module Reckon\n  VERSION=\"$VERSION\"\nend" > lib/reckon/version.rb
+echo -e "module Reckon\n  VERSION = \"$VERSION\"\nend" > lib/reckon/version.rb
 echo "Run `bundle install` to build updated Gemfile.lock"
 bundle install
 echo "Run changelog generator (requires $TOKEN to be your github token)"
@@ -24,3 +24,4 @@ echo "Push changes and tags"
 echo "git push && git push --tags"
 echo "Push new gem"
 echo "gem push reckon-$VERSION.gem"
+gh release create v$VERSION reckon-$VERSION.gem --draft --generate-notes

data/bin/reckon CHANGED Viewed

@@ -4,7 +4,7 @@ require 'rubygems'
 require 'reckon'
 begin
-  options = Reckon::Options.parse
+  options = Reckon::Options.parse_command_line_options
 rescue RuntimeError => e
   puts("ERROR: #{e}")
   exit(1)

data/lib/reckon/app.rb CHANGED Viewed

@@ -1,12 +1,12 @@
-# coding: utf-8
+# frozen_string_literal: true
-require 'pp'
 require 'yaml'
+require 'stringio'
 module Reckon
+  # The main app
   class App
     attr_accessor :options, :seen, :csv_parser, :regexps, :matcher
-    @@cli = HighLine.new
     def initialize(opts = {})
       self.options = opts
@@ -14,9 +14,10 @@ module Reckon
       self.regexps = {}
       self.seen = Set.new
-      self.options[:currency] ||= '$'
-      @csv_parser = CSVParser.new( options )
+      @cli = HighLine.new
+      @csv_parser = CSVParser.new(options)
       @matcher = CosineSimilarity.new(options)
+      @parser = options[:format] =~ /beancount/i ? BeancountParser.new : LedgerParser.new
       learn!
     end
@@ -26,9 +27,13 @@ module Reckon
       fh.puts str
     end
+    # Learn from previous transactions. Used to recommend accounts for a transaction.
     def learn!
       learn_from_account_tokens(options[:account_tokens_file])
       learn_from_ledger_file(options[:existing_ledger_file])
+      # TODO: make this work
+      # this doesn't work because output_file is an IO object
+      # learn_from_ledger_file(options[:output_file]) if File.exist?(options[:output_file])
     end
     def learn_from_account_tokens(filename)
@@ -52,12 +57,13 @@ module Reckon
       raise "#{ledger_file} doesn't exist!" unless File.exist?(ledger_file)
-      learn_from_ledger(File.read(ledger_file))
+      learn_from_ledger(File.new(ledger_file))
     end
+    # Takes an IO-like object
     def learn_from_ledger(ledger)
       LOGGER.info "learning from #{ledger}"
-      LedgerParser.new(ledger).entries.each do |entry|
+      @parser.parse(ledger).each do |entry|
         entry[:accounts].each do |account|
           str = [entry[:desc], account[:amount]].join(" ")
           if account[:name] != options[:bank_account]
@@ -84,7 +90,7 @@ module Reckon
           merged_acct = [account, k].compact.join(':')
           extract_account_tokens(v, merged_acct)
         end
-        at.inject({}) { |memo, e| memo.merge!(e)}
+        at.inject({}) { |memo, e| memo.merge!(e) }
       end
     end
@@ -92,6 +98,7 @@ module Reckon
       # https://github.com/tenderlove/psych/blob/master/lib/psych/visitors/to_ruby.rb
       match = regex_str.match(/^\/(.*)\/([ix]*)$/m)
       fail "failed to parse regexp #{regex_str}" unless match
       options = 0
       (match[2] || '').split('').each do |option|
         case option
@@ -120,13 +127,16 @@ module Reckon
         if row[:money] > 0
           # out_of_account
-          answer = ask_account_question("Which account provided this income? (#{cmd_options})", row)
+          answer = ask_account_question(
+            "Which account provided this income? (#{cmd_options})", row
+          )
           line1 = [options[:bank_account], row[:pretty_money]]
           line2 = [answer, ""]
         else
           # into_account
-          answer = ask_account_question("To which account did this money go? (#{cmd_options})", row)
-#          line1 = [answer, row[:pretty_money_negated]]
+          answer = ask_account_question(
+            "To which account did this money go? (#{cmd_options})", row
+          )
           line1 = [answer, ""]
           line2 = [options[:bank_account], row[:pretty_money]]
         end
@@ -137,9 +147,9 @@ module Reckon
           next
         end
-        ledger = ledger_format(row, line1, line2)
+        ledger = @parser.format_row(row, line1, line2)
         LOGGER.info "ledger line: #{ledger}"
-        learn_from_ledger(ledger) unless options[:account_tokens_file]
+        learn_from_ledger(StringIO.new(ledger)) unless options[:account_tokens_file]
         output(ledger)
       end
     end
@@ -203,7 +213,7 @@ module Reckon
         return possible_answers[0] || default
       end
-      answer = @@cli.ask(msg) do |q|
+      answer = @cli.ask(msg) do |q|
         q.completion = possible_answers
         q.readline = true
         q.default = possible_answers.first
@@ -221,7 +231,7 @@ module Reckon
     end
     def add_description(row)
-      desc_answer = @@cli.ask("Enter a new description for this transaction (empty line aborts)\n") do |q|
+      desc_answer = @cli.ask("Enter a new description for this transaction (empty line aborts)\n") do |q|
         q.overwrite = true
         q.readline = true
         q.default = row[:description]
@@ -231,7 +241,7 @@ module Reckon
     end
     def add_note(row)
-      desc_answer = @@cli.ask("Enter a new note for this transaction (empty line aborts)\n") do |q|
+      desc_answer = @cli.ask("Enter a new note for this transaction (empty line aborts)\n") do |q|
         q.overwrite = true
         q.readline = true
         q.default = row[:note]
@@ -246,7 +256,7 @@ module Reckon
           [account, match[0]]
         end
       }.compact
-      matches.sort_by! { |_account, matched_text| matched_text.length }.map(&:first)
+      matches.sort_by { |_account, matched_text| matched_text.length }.map(&:first)
     end
     def suggest(row)
@@ -254,13 +264,6 @@ module Reckon
         @matcher.find_similar(row[:description]).map { |n| n[:account] }
     end
-    def ledger_format(row, line1, line2)
-      out = "#{row[:pretty_date]}\t#{row[:description]}#{row[:note] ? "\t; " + row[:note]: ""}\n"
-      out += "\t#{line1.first}\t\t\t#{line1.last}\n"
-      out += "\t#{line2.first}\t\t\t#{line2.last}\n\n"
-      out
-    end
     def output(ledger_line)
       options[:output_file].puts ledger_line
       options[:output_file].flush

data/lib/reckon/beancount_parser.rb ADDED Viewed

@@ -0,0 +1,150 @@
+require 'rubygems'
+require 'date'
+module Reckon
+  class BeancountParser
+    attr_accessor :entries
+    def initialize(options = {})
+      @options = options
+      @date_format = options[:ledger_date_format] || options[:date_format] || '%Y-%m-%d'
+    end
+    # 2015-01-01 * "Opening Balance for checking account"
+    #   Assets:US:BofA:Checking                         3490.52 USD
+    #   Equity:Opening-Balances                        -3490.52 USD
+    # input is an object that response to #each_line,
+    # (i.e. a StringIO or an IO object)
+    def parse(input)
+      entries = []
+      comment_chars = ';#%*|'
+      new_entry = {}
+      input.each_line do |entry|
+        next if entry =~ /^\s*[#{comment_chars}]/
+        m = entry.match(%r{
+          ^
+          (\d+[\d/-]+)  # date
+          \s+
+          ([*!])? # type
+          \s*
+          ("[^"]*")? # description (optional)
+          \s*
+          ("[^"]*")? # notes (optional)
+          # tags (not implemented)
+        }x)
+        # (date, type, code, description), type and code are optional
+        if (m)
+          add_entry(entries, new_entry)
+          new_entry = {
+            date: try_parse_date(m[1]),
+            type: m[2] || "",
+            desc: trim_quote(m[3]),
+            notes: trim_quote(m[4]),
+            accounts: []
+          }
+        elsif entry =~ /^\s*$/ && new_entry[:date]
+          add_entry(entries, new_entry)
+          new_entry = {}
+        elsif new_entry[:date] && entry =~ /^\s+/
+          LOGGER.info("Adding new account #{entry}")
+          new_entry[:accounts] << parse_account_line(entry)
+        else
+          LOGGER.info("Unknown entry type: #{entry}")
+          add_entry(entries, new_entry)
+          new_entry = {}
+        end
+      end
+      entries
+    end
+    def format_row(row, line1, line2)
+      out = %Q{#{row[:pretty_date]} * "#{row[:description]}" "#{row[:note]}\n}
+      out += "\t#{line1.first}\t\t\t#{line1.last}\n"
+      out += "\t#{line2.first}\t\t\t#{line2.last}\n\n"
+      out
+    end
+    private
+    # remove leading and trailing quote character (")
+    def trim_quote(str)
+      return str if !str
+      str.gsub(/^"([^"]*)"$/, '\1')
+    end
+    def add_entry(entries, entry)
+      return unless entry[:date] && entry[:accounts].length > 1
+      entry[:accounts] = balance(entry[:accounts])
+      entries << entry
+    end
+    def try_parse_date(date_str)
+      date = Date.parse(date_str)
+      return nil if date.year > 9999 || date.year < 1000
+      date
+    rescue ArgumentError
+      nil
+    end
+    def parse_account_line(entry)
+      # TODO handle buying stocks
+      #   Assets:US:ETrade:VHT                                 19 VHT {132.32 USD, 2017-08-27}
+      (account_name, rest) = entry.strip.split(/\s{2,}|\t+/, 2)
+      if rest.nil? || rest.empty?
+        return {
+          name: account_name,
+          amount: clean_money("")
+        }
+      end
+      value = if rest =~ /{/
+                (qty, dollar_value, date) = rest.split(/[{,]/)
+                (qty.to_f * dollar_value.to_f).to_s
+              else
+                rest
+              end
+      return {
+        name: account_name,
+        amount: clean_money(value || "")
+      }
+    end
+    def balance(accounts)
+      return accounts unless accounts.any? { |i| i[:amount].nil? }
+      sum = accounts.reduce(0) { |m, n| m + (n[:amount] || 0) }
+      count = 0
+      accounts.each do |account|
+        next unless account[:amount].nil?
+        count += 1
+        account[:amount] = -sum
+      end
+      if count > 1
+        puts "Warning: unparsable entry due to more than one missing money value."
+        p accounts
+        puts
+      end
+      accounts
+    end
+    def clean_money(money)
+      return nil if money.nil? || money.empty?
+      money.gsub(/[^0-9.-]/, '').to_f
+    end
+  end
+end

data/lib/reckon/cosine_similarity.rb CHANGED Viewed

@@ -17,7 +17,6 @@ module Reckon
     def initialize(options)
       @docs = DocumentInfo.new({}, {})
-      @options = options
     end
     def add_document(account, doc)

data/lib/reckon/csv_parser.rb CHANGED Viewed

@@ -1,32 +1,28 @@
-#coding: utf-8
+# frozen_string_literal: true
+require 'stringio'
 module Reckon
+  # Parses CSV files
   class CSVParser
-    attr_accessor :options, :csv_data, :money_column_indices, :date_column_index, :description_column_indices, :money_column, :date_column
+    attr_accessor :options, :csv_data, :money_column_indices, :date_column_index,
+                  :description_column_indices, :money_column, :date_column
     def initialize(options = {})
       self.options = options
+      self.options[:csv_separator] = "\t" if options[:csv_separator] == '\t'
       self.options[:currency] ||= '$'
+      # we convert to a string so we can do character encoding cleanup
       @csv_data = parse(options[:string] || File.read(options[:file]), options[:file])
       filter_csv
       detect_columns
     end
+    # transpose csv_data (array of rows) to an array of columns
     def columns
-      @columns ||=
-        begin
-          last_row_length = nil
-          csv_data.inject([]) do |memo, row|
-            unless row.all? { |i| i.nil? || i.length == 0 }
-              row.each_with_index do |entry, index|
-                memo[index] ||= []
-                memo[index] << (entry || '').strip
-              end
-              last_row_length = row.length
-            end
-            memo
-          end
-        end
+      @columns ||= @csv_data[0].zip(*@csv_data[1..])
     end
     def date_for(index)
@@ -34,7 +30,7 @@ module Reckon
     end
     def pretty_date_for(index)
-      @date_column.pretty_for( index )
+      @date_column.pretty_for(index)
     end
     def money_for(index)
@@ -42,7 +38,7 @@ module Reckon
     end
     def pretty_money(amount, negate = false)
-      Money.new( amount, @options ).pretty( negate )
+      Money.new(amount, @options).pretty(negate)
     end
     def pretty_money_for(index, negate = false)
@@ -54,11 +50,11 @@ module Reckon
     def description_for(index)
       description_column_indices.map { |i| columns[i][index].to_s.strip }
-        .reject(&:empty?)
-        .join("; ")
-        .squeeze(" ")
-        .gsub(/(;\s+){2,}/, '')
-        .strip
+                                .reject(&:empty?)
+                                .join("; ")
+                                .squeeze(" ")
+                                .gsub(/(;\s+){2,}/, '')
+                                .strip
     end
     def row(index)
@@ -84,9 +80,10 @@ module Reckon
         money_score = date_score = possible_neg_money_count = possible_pos_money_count = 0
         last = nil
         column.reverse.each_with_index do |entry, row_from_bottom|
+          entry ||= "" # entries can be nil
           row = csv_data[csv_data.length - 1 - row_from_bottom]
           entry = entry.strip
-          money_score += Money::likelihood( entry )
+          money_score += Money::likelihood(entry)
           possible_neg_money_count += 1 if entry =~ /^\$?[\-\(]\$?\d+/
           possible_pos_money_count += 1 if entry =~ /^\+?\$?\+?\d+/
           date_score += DateColumn.likelihood(entry)
@@ -97,8 +94,8 @@ module Reckon
             row.each do |row_entry|
               row_entry = row_entry.to_s.gsub(/[^\-\d\.]/, '').to_f
               if row_entry != 0 && last + row_entry == entry_as_num
-                 money_score -= 10
-                 break
+                money_score -= 10
+                break
               end
             end
           end
@@ -110,7 +107,8 @@ module Reckon
           found_likely_money_column = true
         end
-        results << { :index => index, :money_score => money_score, :date_score => date_score }
+        results << { :index => index, :money_score => money_score,
+                     :date_score => date_score }
       end
       results.sort_by! { |n| -n[:money_score] }
@@ -129,14 +127,15 @@ module Reckon
     # Some csv files negative/positive amounts are indicated in separate account
     def detect_sign_column
       return if columns[0].length <= 2 # This test needs requires more than two rows otherwise will lead to false positives
       signs = []
       if @money_column_indices[0] > 0
-        column = columns[ @money_column_indices[0] - 1 ]
+        column = columns[@money_column_indices[0] - 1]
         signs = column.uniq
       end
       if (signs.length != 2 &&
           (@money_column_indices[0] + 1 < columns.length))
-        column = columns[ @money_column_indices[0] + 1 ]
+        column = columns[@money_column_indices[0] + 1]
         signs = column.uniq
       end
       if signs.length == 2
@@ -166,15 +165,19 @@ module Reckon
           self.money_column_indices = [options[:money_column] - 1]
         elsif options[:money_columns].length == 2
           in_col, out_col = options[:money_columns]
-          self.money_column_indices = [in_col -1, out_col -1]
+          self.money_column_indices = [in_col - 1, out_col - 1]
         else
           puts "Unable to determine money columns, use --money-columns to specify the 1 or 2 column(s) reckon should use."
         end
       # If no money_column(s) argument is supplied, try to automatically infer money_column(s)
       else
-        self.money_column_indices = results.select { |n| n[:is_money_column] }.map { |n| n[:index] }
+        self.money_column_indices = results.select { |n|
+                                      n[:is_money_column]
+                                    }.map { |n| n[:index] }
         if self.money_column_indices.length == 1
+          # TODO: print the unfiltered column number, not the filtered
+          # ie if money column is 7, but we ignore columns 4 and 5, this prints "Using column 5 as the money column"
           puts "Using column #{money_column_indices.first + 1} as the money column.  Use --money-colum to specify a different one."
         elsif self.money_column_indices.length == 2
           puts "Using columns #{money_column_indices[0] + 1} and #{money_column_indices[1] + 1} as money column. Use --money-columns to specify different ones."
@@ -204,20 +207,53 @@ module Reckon
       self.description_column_indices = results.map { |i| i[:index] }
     end
-    def parse(data, filename=nil)
+    def parse(data, filename = nil)
       # Use force_encoding to convert the string to utf-8 with as few invalid characters
       # as possible.
       data.force_encoding(try_encoding(data, filename))
       data = data.encode('UTF-8', invalid: :replace, undef: :replace, replace: '?')
       data.sub!("\xEF\xBB\xBF", '') # strip byte order marker, if it exists
-      rows = []
-      data.each_line.with_index do |line, i|
-        next if i < (options[:contains_header] || 0)
-        rows << CSV.parse_line(line, col_sep: options[:csv_separator] || ',')
+      separator = options[:csv_separator] || guess_column_separator(data)
+      header_lines_to_skip = options[:contains_header] || 0
+      # -1 is skip 0 footer rows
+      footer_lines_to_skip = (options[:contains_footer] || 0) + 1
+      # convert to a stringio object to handle multi-line fields
+      parser_opts = {
+        col_sep: separator,
+        skip_blanks: true
+      }
+      begin
+        rows = CSV.parse(StringIO.new(data), **parser_opts)
+        rows[header_lines_to_skip..-footer_lines_to_skip]
+      rescue CSV::MalformedCSVError
+        # try removing N header lines before parsing
+        index = 0
+        count = 0
+        while count < header_lines_to_skip
+          index = data.index("\n", index) + 1 # skip over newline character
+          count += 1
+        end
+        rows = CSV.parse(StringIO.new(data[index..-1]), **parser_opts)
+        rows[0..-footer_lines_to_skip]
+      end
+    end
+    def guess_column_separator(data)
+      delimiters = [',', "\t", ';', ':', '|']
+      counts = [0] * delimiters.length
+      data.each_line do |line|
+        delimiters.each_with_index do |delim, i|
+          counts[i] += line.count(delim)
+        end
       end
-      rows
+      LOGGER.info("guessing #{delimiters[counts.index(counts.max)]} as csv separator")
+      delimiters[counts.index(counts.max)]
     end
     def try_encoding(data, filename = nil)