RubyGems - reckon - Versions diffs - 0.4.4 → 0.5.4 - Mend

reckon 0.4.4 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

checksums.yaml +5 -5
data/.gitignore +3 -0
data/.ruby-version +1 -1
data/.travis.yml +10 -2
data/CHANGELOG.md +235 -0
data/Gemfile +0 -1
data/Gemfile.lock +73 -15
data/README.md +12 -5
data/lib/reckon.rb +13 -12
data/lib/reckon/app.rb +94 -116
data/lib/reckon/cosine_similarity.rb +122 -0
data/lib/reckon/csv_parser.rb +116 -129
data/lib/reckon/date_column.rb +60 -0
data/lib/reckon/ledger_parser.rb +204 -30
data/lib/reckon/logger.rb +4 -0
data/lib/reckon/money.rb +6 -62
data/lib/reckon/version.rb +3 -0
data/reckon.gemspec +8 -5
data/spec/data_fixtures/51-sample.csv +8 -0
data/spec/data_fixtures/51-tokens.yml +9 -0
data/spec/data_fixtures/73-sample.csv +2 -0
data/spec/data_fixtures/73-tokens.yml +8 -0
data/spec/data_fixtures/73-transactions.ledger +7 -0
data/spec/data_fixtures/85-date-example.csv +2 -0
data/spec/data_fixtures/austrian_example.csv +13 -0
data/spec/data_fixtures/bom_utf8_file.csv +1 -0
data/spec/data_fixtures/broker_canada_example.csv +12 -0
data/spec/data_fixtures/chase.csv +9 -0
data/spec/data_fixtures/danish_kroner_nordea_example.csv +6 -0
data/spec/data_fixtures/english_date_example.csv +3 -0
data/spec/data_fixtures/french_example.csv +9 -0
data/spec/data_fixtures/german_date_example.csv +3 -0
data/spec/data_fixtures/harder_date_example.csv +5 -0
data/spec/data_fixtures/ing.csv +3 -0
data/spec/data_fixtures/intuit_mint_example.csv +7 -0
data/spec/data_fixtures/invalid_header_example.csv +6 -0
data/spec/data_fixtures/inversed_credit_card.csv +16 -0
data/spec/data_fixtures/nationwide.csv +4 -0
data/spec/data_fixtures/simple.csv +2 -0
data/spec/data_fixtures/some_other.csv +9 -0
data/spec/data_fixtures/spanish_date_example.csv +3 -0
data/spec/data_fixtures/suntrust.csv +7 -0
data/spec/data_fixtures/test_money_column.csv +3 -0
data/spec/data_fixtures/two_money_columns.csv +5 -0
data/spec/data_fixtures/yyyymmdd_date_example.csv +1 -0
data/spec/reckon/app_spec.rb +96 -34
data/spec/reckon/csv_parser_spec.rb +185 -307
data/spec/reckon/date_column_spec.rb +12 -13
data/spec/reckon/ledger_parser_spec.rb +99 -9
data/spec/reckon/money_spec.rb +42 -29
data/spec/spec_helper.rb +22 -0
metadata +85 -21
data/CHANGES.md +0 -9

data/lib/reckon/date_column.rb ADDED

@@ -0,0 +1,60 @@
+module Reckon
+  class DateColumn < Array
+    attr_accessor :endian_precedence
+    def initialize( arr = [], options = {} )
+      arr.each do |value|
+        if options[:date_format]
+          begin
+            value = Date.strptime(value, options[:date_format])
+          rescue
+            puts "I'm having trouble parsing #{value} with the desired format: #{options[:date_format]}"
+            exit 1
+          end
+        else
+          value = [$1, $2, $3].join("/") if value =~ /^(\d{4})(\d{2})(\d{2})\d+\[\d+\:GMT\]$/ # chase format
+          value = [$3, $2, $1].join("/") if value =~ /^(\d{2})\.(\d{2})\.(\d{4})$/            # german format
+          value = [$3, $2, $1].join("/") if value =~ /^(\d{2})\-(\d{2})\-(\d{4})$/            # nordea format
+          value = [$1, $2, $3].join("/") if value =~ /^(\d{4})\-(\d{2})\-(\d{2})$/            # yyyy-mm-dd format
+          value = [$1, $2, $3].join("/") if value =~ /^(\d{4})(\d{2})(\d{2})/                 # yyyymmdd format
+          unless @endian_precedence # Try to detect endian_precedence
+            reg_match = value.match( /^(\d\d)\/(\d\d)\/\d\d\d?\d?/ )
+            # If first one is not \d\d/\d\d/\d\d\d?\d set it to default
+            if !reg_match
+              @endian_precedence = [:middle, :little]
+            elsif reg_match[1].to_i > 12
+              @endian_precedence = [:little]
+            elsif reg_match[2].to_i > 12
+              @endian_precedence = [:middle]
+            end
+          end
+        end
+        self.push( value )
+      end
+      # if endian_precedence still nil, raise error
+      unless @endian_precedence || options[:date_format]
+        raise( "Unable to determine date format. Please specify using --date-format" )
+      end
+    end
+    def for( index )
+      value = self.at( index )
+      guess = Chronic.parse(value, :context => :past,
+                            :endian_precedence => @endian_precedence )
+      if guess.to_i < 953236800 && value =~ /\//
+        guess = Chronic.parse((value.split("/")[0...-1] + [(2000 + value.split("/").last.to_i).to_s]).join("/"), :context => :past,
+                              :endian_precedence => @endian_precedence)
+      end
+      guess && guess.to_date
+    end
+    def pretty_for(index)
+      date = self.for(index)
+      return "" if date.nil?
+      date.iso8601
+    end
+  end
+end

data/lib/reckon/ledger_parser.rb CHANGED

@@ -1,4 +1,109 @@
 #!/usr/bin/env ruby
+# frozen_string_literal: true
+# From: https://www.ledger-cli.org/3.0/doc/ledger3.html#Transactions-and-Comments
+#
+# The ledger file format is quite simple, but also very flexible. It supports many
+# options, though typically the user can ignore most of them. They are summarized below.
+#
+# The initial character of each line determines what the line means, and how it should
+# be interpreted. Allowable initial characters are:
+#
+# NUMBER
+#     A line beginning with a number denotes an entry. It may be followed by any
+#     number of lines, each beginning with whitespace, to denote the entry's account
+#     transactions. The format of the first line is:
+#
+#     DATE[=EDATE] [*|!] [(CODE)] DESC
+#
+#     If '*' appears after the date (with optional effective date), it indicates the
+#     entry is "cleared", which can mean whatever the user wants it to mean. If '!'
+#     appears after the date, it indicates d the entry is "pending"; i.e., tentatively
+#     cleared from the user's point of view, but not yet actually cleared. If a 'CODE'
+#     appears in parentheses, it may be used to indicate a check number, or the type of
+#     the transaction. Following these is the payee, or a description of the
+#     transaction.
+#
+#     The format of each following transaction is:
+#
+#       ACCOUNT  AMOUNT  [; NOTE]
+#
+#     The 'ACCOUNT' may be surrounded by parentheses if it is a virtual transactions, or
+#     square brackets if it is a virtual transactions that must balance. The 'AMOUNT'
+#     can be followed by a per-unit transaction cost, by specifying '@ AMOUNT', or a
+#     complete transaction cost with '@@ AMOUNT'. Lastly, the 'NOTE' may specify an
+#     actual and/or effective date for the transaction by using the syntax
+#     '[ACTUAL_DATE]' or '[=EFFECTIVE_DATE]' or '[ACTUAL_DATE=EFFECtIVE_DATE]'.
+# =
+#     An automated entry. A value expression must appear after the equal sign.
+#
+#     After this initial line there should be a set of one or more transactions, just as
+#     if it were normal entry. If the amounts of the transactions have no commodity,
+#     they will be applied as modifiers to whichever real transaction is matched by the
+#     value expression.
+# ~
+#     A period entry. A period expression must appear after the tilde.
+#
+#     After this initial line there should be a set of one or more transactions, just as
+#     if it were normal entry.
+# !
+#     A line beginning with an exclamation mark denotes a command directive. It must be
+#     immediately followed by the command word. The supported commands are:
+#
+#     '!include'
+#         Include the stated ledger file.
+#
+#     '!account'
+#         The account name is given is taken to be the parent of all transactions that
+#         follow, until '!end' is seen.
+#
+#     '!end'
+#         Ends an account block.
+#
+# ;
+#     A line beginning with a colon indicates a comment, and is ignored.
+# Y
+#     If a line begins with a capital Y, it denotes the year used for all subsequent
+#     entries that give a date without a year. The year should appear immediately after
+#     the Y, for example: 'Y2004'. This is useful at the beginning of a file, to specify
+#     the year for that file. If all entries specify a year, however, this command has
+#     no effect.
+#
+# P
+#     Specifies a historical price for a commodity. These are usually found in a pricing
+#     history file (see the -Q option). The syntax is:
+#
+#     P DATE SYMBOL PRICE
+#
+# N SYMBOL
+#     Indicates that pricing information is to be ignored for a given symbol, nor will
+#     quotes ever be downloaded for that symbol. Useful with a home currency, such as
+#     the dollar ($). It is recommended that these pricing options be set in the price
+#     database file, which defaults to ~/.pricedb. The syntax for this command is:
+#
+#     N SYMBOL
+#
+# D AMOUNT
+#     Specifies the default commodity to use, by specifying an amount in the expected
+#     format. The entry command will use this commodity as the default when none other
+#     can be determined. This command may be used multiple times, to set the default
+#     flags for different commodities; whichever is seen last is used as the default
+#     commodity. For example, to set US dollars as the default commodity, while also
+#     setting the thousands flag and decimal flag for that commodity, use:
+#
+#     D $1,000.00
+#
+# C AMOUNT1 = AMOUNT2
+#     Specifies a commodity conversion, where the first amount is given to be equivalent
+#     to the second amount. The first amount should use the decimal precision desired
+#     during reporting:
+#
+#     C 1.00 Kb = 1024 bytes
+#
+# i, o, b, h
+#     These four relate to timeclock support, which permits ledger to read timelog
+#     files. See the timeclock's documentation for more info on the syntax of its
+#     timelog files.
 require 'rubygems'
@@ -8,54 +113,123 @@ module Reckon
     attr_accessor :entries
     def initialize(ledger, options = {})
-      @entries = []
+      @options = options
+      @date_format = options[:date_format] || '%Y-%m-%d'
       parse(ledger)
     end
     def parse(ledger)
       @entries = []
-      date = desc = nil
-      accounts = []
+      new_entry = {}
+      in_comment = false
+      comment_chars = ';#%*|'
       ledger.strip.split("\n").each do |entry|
-        next if entry =~ /^\s*$/ || entry =~ /^[^ \t\d]/
-        if entry =~ /^([\d\/-]+)(\=[\d\/-]+)?(\s+[\*!]?\s*.*?)$/
-          @entries << { :date => date.strip, :desc => desc.strip, :accounts => balance(accounts) } if date
-          date = $1
-          desc = $3
-          accounts = []
-        elsif date && entry =~ /^\s+([a-z\s:_\-]+)(\s*$|(\s+[\$\.,\-\d\+]+)($|\s+($|[^\$\.,\-\d\+])))/i
-          accounts << { :name => $1.strip, :amount => clean_money($3) }
+        # strip comment lines
+        in_comment = true if entry == 'comment'
+        in_comment = false if entry == 'end comment'
+        next if in_comment
+        next if entry =~ /^\s*[#{comment_chars}]/
+        # (date, type, code, description), type and code are optional
+        if (m = entry.match(%r{^(\d+[\d/-]+)\s+([*!])?\s*(\([^)]+\))?\s*(.*)$}))
+          add_entry(new_entry)
+          new_entry = {
+            date: try_parse_date(m[1]),
+            type: m[2] || "",
+            code: m[3] && m[3].tr('()', '') || "",
+            desc: m[4].strip,
+            accounts: []
+          }
+        elsif entry =~ /^\s*$/ && new_entry[:date]
+          add_entry(new_entry)
+          new_entry = {}
+        elsif new_entry[:date] && entry =~ /^\s+/
+          LOGGER.info("Adding new account #{entry}")
+          new_entry[:accounts] << parse_account_line(entry)
         else
-          @entries << { :date => date.strip, :desc => desc.strip, :accounts => balance(accounts) } if date
-          date = desc = nil
-          accounts = []
+          LOGGER.info("Unknown entry type: #{entry}")
+          add_entry(new_entry)
+          new_entry = {}
         end
       end
-      @entries << { :date => date.strip, :desc => desc.strip, :accounts => balance(accounts) } if date
+      add_entry(new_entry)
     end
-    def balance(accounts)
-      if accounts.any? { |i| i[:amount].nil? }
-        sum = accounts.inject(0) {|m, account| m + (account[:amount] || 0) }
-        count = 0
-        accounts.each do |account|
-          if account[:amount].nil?
-            count += 1
-            account[:amount] = 0 - sum
-          end
-        end
-        if count > 1
-          puts "Warning: unparsable entry due to more than one missing money value."
-          p accounts
-          puts
+    # roughly matches ledger csv format
+    def to_csv
+      return @entries.flat_map do |n|
+        n[:accounts].map do |a|
+          row = [
+            n[:date].strftime(@date_format),
+            n[:code],
+            n[:desc],
+            a[:name],
+            "", # currency (not implemented)
+            a[:amount],
+            n[:type],
+            "", # account comment (not implemented)
+          ]
+          CSV.generate_line(row).strip
         end
       end
+    end
+    private
+    def add_entry(entry)
+      return unless entry[:date] && entry[:accounts].length > 1
+      entry[:accounts] = balance(entry[:accounts])
+      @entries << entry
+    end
+    def try_parse_date(date_str)
+      date = Date.parse(date_str)
+      return nil if date.year > 9999 || date.year < 1000
+      date
+    rescue ArgumentError
+      nil
+    end
+    def parse_account_line(entry)
+      (account_name, rest) = entry.strip.split(/\s{2,}|\t+/, 2)
+      return {
+        name: account_name,
+        amount: clean_money("")
+      } if rest.nil? || rest.empty?
+      (value, _comment) = rest.split(/;/)
+      return {
+        name: account_name,
+        amount: clean_money(value || "")
+      }
+    end
+    def balance(accounts)
+      return accounts unless accounts.any? { |i| i[:amount].nil? }
+      sum = accounts.reduce(0) { |m, n| m + (n[:amount] || 0) }
+      count = 0
+      accounts.each do |account|
+        next unless account[:amount].nil?
+        count += 1
+        account[:amount] = -sum
+      end
+      if count > 1
+        puts "Warning: unparsable entry due to more than one missing money value."
+        p accounts
+        puts
+      end
       accounts
     end
     def clean_money(money)
-      return nil if money.nil? || money.length == 0
+      return nil if money.nil? || money.empty?
       money.gsub(/[^0-9.-]/, '').to_f
     end
   end

data/lib/reckon/logger.rb ADDED

@@ -0,0 +1,4 @@
+module Reckon
+  LOGGER = Logger.new(STDERR)
+  LOGGER.level = Logger::WARN
+end

data/lib/reckon/money.rb CHANGED

@@ -55,9 +55,9 @@ module Reckon
       any_number_regex = /^(.*?)([\d\.]+)/
       # Prefer matching the money_format, match any number otherwise
-      m = value.match( money_format_regex ) ||
+      m = value.match( money_format_regex ) ||
         value.match( any_number_regex )
-      if m
+      if m
         amount = m[2].to_f
         # Check whether the money had a - or (, which indicates negative amounts
         if (m[1].match( /^[\(-]/ ) || m[1].match( /-$/  ))
@@ -71,12 +71,13 @@ module Reckon
     def Money::likelihood( entry )
       money_score = 0
-      money_score += 20 if entry[/^[\-\+\(]{0,2}\$/]
+      # digits separated by , or . with no more than 2 trailing digits
+      money_score += 40 if entry.match(/\d+[,.]\d{2}[^\d]*$/)
       money_score += 10 if entry[/^\$?\-?\$?\d+[\.,\d]*?[\.,]\d\d$/]
       money_score += 10 if entry[/\d+[\.,\d]*?[\.,]\d\d$/]
       money_score += entry.gsub(/[^\d\.\-\+,\(\)]/, '').length if entry.length < 7
-      money_score -= entry.length if entry.length > 8
-      money_score -= 20 if entry !~ /^[\$\+\.\-,\d\(\)]+$/
+      money_score -= entry.length if entry.length > 12
+      money_score -= 20 if (entry !~ /^[\$\+\.\-,\d\(\)]+$/) && entry.length > 0
       money_score
     end
   end
@@ -112,61 +113,4 @@ module Reckon
       self
     end
   end
-  class DateColumn < Array
-    attr_accessor :endian_precedence
-    def initialize( arr = [], options = {} )
-      arr.each do |value|
-        if options[:date_format]
-          begin
-            value = Date.strptime(value, options[:date_format])
-          rescue
-            puts "I'm having trouble parsing #{value} with the desired format: #{options[:date_format]}"
-            exit 1
-          end
-        else
-          value = [$1, $2, $3].join("/") if value =~ /^(\d{4})(\d{2})(\d{2})\d+\[\d+\:GMT\]$/ # chase format
-          value = [$3, $2, $1].join("/") if value =~ /^(\d{2})\.(\d{2})\.(\d{4})$/            # german format
-          value = [$3, $2, $1].join("/") if value =~ /^(\d{2})\-(\d{2})\-(\d{4})$/            # nordea format
-          value = [$1, $2, $3].join("/") if value =~ /^(\d{4})\-(\d{2})\-(\d{2})$/            # yyyy-mm-dd format
-          value = [$1, $2, $3].join("/") if value =~ /^(\d{4})(\d{2})(\d{2})/                 # yyyymmdd format
-          unless @endian_precedence # Try to detect endian_precedence
-            reg_match = value.match( /^(\d\d)\/(\d\d)\/\d\d\d?\d?/ )
-            # If first one is not \d\d/\d\d/\d\d\d?\d set it to default
-            if !reg_match
-              @endian_precedence = [:middle, :little]
-            elsif reg_match[1].to_i > 12
-              @endian_precedence = [:little]
-            elsif reg_match[2].to_i > 12
-              @endian_precedence = [:middle]
-            end
-          end
-        end
-        self.push( value )
-      end
-      # if endian_precedence still nil, raise error
-      unless @endian_precedence || options[:date_format]
-        raise( "Unable to determine date format. Please specify using --date-format" )
-      end
-    end
-    def for( index )
-      value = self.at( index )
-      guess = Chronic.parse(value, :context => :past,
-                            :endian_precedence => @endian_precedence )
-      if guess.to_i < 953236800 && value =~ /\//
-        guess = Chronic.parse((value.split("/")[0...-1] + [(2000 + value.split("/").last.to_i).to_s]).join("/"), :context => :past,
-                              :endian_precedence => @endian_precedence)
-      end
-      guess
-    end
-    def pretty_for(index)
-      self.for(index).strftime("%Y/%m/%d")
-    end
-  end
 end

data/lib/reckon/version.rb ADDED

@@ -0,0 +1,3 @@
+module Reckon
+  VERSION = "0.5.4"
+end

data/reckon.gemspec CHANGED

@@ -1,14 +1,15 @@
-# -*- encoding: utf-8 -*-
 $:.push File.expand_path("../lib", __FILE__)
+require_relative 'lib/reckon/version'
 Gem::Specification.new do |s|
   s.name = %q{reckon}
-  s.version = "0.4.4"
-  s.authors = ["Andrew Cantino", "BlackEdder"]
+  s.version = Reckon::VERSION
+  s.authors = ["Andrew Cantino", "BlackEdder", "Ben Prew"]
   s.email = %q{andrew@iterationlabs.com}
   s.homepage = %q{https://github.com/cantino/reckon}
   s.description = %q{Reckon automagically converts CSV files for use with the command-line accounting tool Ledger.  It also helps you to select the correct accounts associated with the CSV data using Bayesian machine learning.}
   s.summary = %q{Utility for interactively converting and labeling CSV files for the Ledger accounting tool.}
+  s.licenses = ['MIT']
   s.files         = `git ls-files`.split("\n")
   s.test_files    = `git ls-files -- {test,spec,features}/*`.split("\n")
@@ -16,9 +17,11 @@ Gem::Specification.new do |s|
   s.require_paths = ["lib"]
   s.add_development_dependency "rspec", ">= 1.2.9"
-  s.add_runtime_dependency "fastercsv", ">= 1.5.1"
+  s.add_development_dependency "pry", ">= 0.12.2"
+  s.add_development_dependency "rantly", "= 1.2.0"
+  s.add_development_dependency "github_changelog_generator"
   s.add_runtime_dependency "chronic", ">= 0.3.0"
   s.add_runtime_dependency "highline", ">= 1.5.2"
   s.add_runtime_dependency "terminal-table", ">= 1.4.2"
+  s.add_runtime_dependency "rchardet", ">= 1.8.0"
 end