reckon 0.4.4 → 0.5.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +3 -0
  3. data/.ruby-version +1 -1
  4. data/.travis.yml +10 -2
  5. data/CHANGELOG.md +235 -0
  6. data/Gemfile +0 -1
  7. data/Gemfile.lock +73 -15
  8. data/README.md +12 -5
  9. data/lib/reckon.rb +13 -12
  10. data/lib/reckon/app.rb +94 -116
  11. data/lib/reckon/cosine_similarity.rb +122 -0
  12. data/lib/reckon/csv_parser.rb +116 -129
  13. data/lib/reckon/date_column.rb +60 -0
  14. data/lib/reckon/ledger_parser.rb +204 -30
  15. data/lib/reckon/logger.rb +4 -0
  16. data/lib/reckon/money.rb +6 -62
  17. data/lib/reckon/version.rb +3 -0
  18. data/reckon.gemspec +8 -5
  19. data/spec/data_fixtures/51-sample.csv +8 -0
  20. data/spec/data_fixtures/51-tokens.yml +9 -0
  21. data/spec/data_fixtures/73-sample.csv +2 -0
  22. data/spec/data_fixtures/73-tokens.yml +8 -0
  23. data/spec/data_fixtures/73-transactions.ledger +7 -0
  24. data/spec/data_fixtures/85-date-example.csv +2 -0
  25. data/spec/data_fixtures/austrian_example.csv +13 -0
  26. data/spec/data_fixtures/bom_utf8_file.csv +1 -0
  27. data/spec/data_fixtures/broker_canada_example.csv +12 -0
  28. data/spec/data_fixtures/chase.csv +9 -0
  29. data/spec/data_fixtures/danish_kroner_nordea_example.csv +6 -0
  30. data/spec/data_fixtures/english_date_example.csv +3 -0
  31. data/spec/data_fixtures/french_example.csv +9 -0
  32. data/spec/data_fixtures/german_date_example.csv +3 -0
  33. data/spec/data_fixtures/harder_date_example.csv +5 -0
  34. data/spec/data_fixtures/ing.csv +3 -0
  35. data/spec/data_fixtures/intuit_mint_example.csv +7 -0
  36. data/spec/data_fixtures/invalid_header_example.csv +6 -0
  37. data/spec/data_fixtures/inversed_credit_card.csv +16 -0
  38. data/spec/data_fixtures/nationwide.csv +4 -0
  39. data/spec/data_fixtures/simple.csv +2 -0
  40. data/spec/data_fixtures/some_other.csv +9 -0
  41. data/spec/data_fixtures/spanish_date_example.csv +3 -0
  42. data/spec/data_fixtures/suntrust.csv +7 -0
  43. data/spec/data_fixtures/test_money_column.csv +3 -0
  44. data/spec/data_fixtures/two_money_columns.csv +5 -0
  45. data/spec/data_fixtures/yyyymmdd_date_example.csv +1 -0
  46. data/spec/reckon/app_spec.rb +96 -34
  47. data/spec/reckon/csv_parser_spec.rb +185 -307
  48. data/spec/reckon/date_column_spec.rb +12 -13
  49. data/spec/reckon/ledger_parser_spec.rb +99 -9
  50. data/spec/reckon/money_spec.rb +42 -29
  51. data/spec/spec_helper.rb +22 -0
  52. metadata +85 -21
  53. data/CHANGES.md +0 -9
@@ -0,0 +1,60 @@
1
+ module Reckon
2
+ class DateColumn < Array
3
+ attr_accessor :endian_precedence
4
+ def initialize( arr = [], options = {} )
5
+ arr.each do |value|
6
+ if options[:date_format]
7
+ begin
8
+ value = Date.strptime(value, options[:date_format])
9
+ rescue
10
+ puts "I'm having trouble parsing #{value} with the desired format: #{options[:date_format]}"
11
+ exit 1
12
+ end
13
+ else
14
+ value = [$1, $2, $3].join("/") if value =~ /^(\d{4})(\d{2})(\d{2})\d+\[\d+\:GMT\]$/ # chase format
15
+ value = [$3, $2, $1].join("/") if value =~ /^(\d{2})\.(\d{2})\.(\d{4})$/ # german format
16
+ value = [$3, $2, $1].join("/") if value =~ /^(\d{2})\-(\d{2})\-(\d{4})$/ # nordea format
17
+ value = [$1, $2, $3].join("/") if value =~ /^(\d{4})\-(\d{2})\-(\d{2})$/ # yyyy-mm-dd format
18
+ value = [$1, $2, $3].join("/") if value =~ /^(\d{4})(\d{2})(\d{2})/ # yyyymmdd format
19
+
20
+
21
+ unless @endian_precedence # Try to detect endian_precedence
22
+ reg_match = value.match( /^(\d\d)\/(\d\d)\/\d\d\d?\d?/ )
23
+ # If first one is not \d\d/\d\d/\d\d\d?\d set it to default
24
+ if !reg_match
25
+ @endian_precedence = [:middle, :little]
26
+ elsif reg_match[1].to_i > 12
27
+ @endian_precedence = [:little]
28
+ elsif reg_match[2].to_i > 12
29
+ @endian_precedence = [:middle]
30
+ end
31
+ end
32
+ end
33
+ self.push( value )
34
+ end
35
+ # if endian_precedence still nil, raise error
36
+ unless @endian_precedence || options[:date_format]
37
+ raise( "Unable to determine date format. Please specify using --date-format" )
38
+ end
39
+ end
40
+
41
+ def for( index )
42
+ value = self.at( index )
43
+ guess = Chronic.parse(value, :context => :past,
44
+ :endian_precedence => @endian_precedence )
45
+ if guess.to_i < 953236800 && value =~ /\//
46
+ guess = Chronic.parse((value.split("/")[0...-1] + [(2000 + value.split("/").last.to_i).to_s]).join("/"), :context => :past,
47
+ :endian_precedence => @endian_precedence)
48
+ end
49
+ guess && guess.to_date
50
+ end
51
+
52
+ def pretty_for(index)
53
+ date = self.for(index)
54
+ return "" if date.nil?
55
+
56
+ date.iso8601
57
+ end
58
+
59
+ end
60
+ end
@@ -1,4 +1,109 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # From: https://www.ledger-cli.org/3.0/doc/ledger3.html#Transactions-and-Comments
5
+ #
6
+ # The ledger file format is quite simple, but also very flexible. It supports many
7
+ # options, though typically the user can ignore most of them. They are summarized below.
8
+ #
9
+ # The initial character of each line determines what the line means, and how it should
10
+ # be interpreted. Allowable initial characters are:
11
+ #
12
+ # NUMBER
13
+ # A line beginning with a number denotes an entry. It may be followed by any
14
+ # number of lines, each beginning with whitespace, to denote the entry's account
15
+ # transactions. The format of the first line is:
16
+ #
17
+ # DATE[=EDATE] [*|!] [(CODE)] DESC
18
+ #
19
+ # If '*' appears after the date (with optional effective date), it indicates the
20
+ # entry is "cleared", which can mean whatever the user wants it to mean. If '!'
21
+ # appears after the date, it indicates d the entry is "pending"; i.e., tentatively
22
+ # cleared from the user's point of view, but not yet actually cleared. If a 'CODE'
23
+ # appears in parentheses, it may be used to indicate a check number, or the type of
24
+ # the transaction. Following these is the payee, or a description of the
25
+ # transaction.
26
+ #
27
+ # The format of each following transaction is:
28
+ #
29
+ # ACCOUNT AMOUNT [; NOTE]
30
+ #
31
+ # The 'ACCOUNT' may be surrounded by parentheses if it is a virtual transactions, or
32
+ # square brackets if it is a virtual transactions that must balance. The 'AMOUNT'
33
+ # can be followed by a per-unit transaction cost, by specifying '@ AMOUNT', or a
34
+ # complete transaction cost with '@@ AMOUNT'. Lastly, the 'NOTE' may specify an
35
+ # actual and/or effective date for the transaction by using the syntax
36
+ # '[ACTUAL_DATE]' or '[=EFFECTIVE_DATE]' or '[ACTUAL_DATE=EFFECtIVE_DATE]'.
37
+ # =
38
+ # An automated entry. A value expression must appear after the equal sign.
39
+ #
40
+ # After this initial line there should be a set of one or more transactions, just as
41
+ # if it were normal entry. If the amounts of the transactions have no commodity,
42
+ # they will be applied as modifiers to whichever real transaction is matched by the
43
+ # value expression.
44
+ # ~
45
+ # A period entry. A period expression must appear after the tilde.
46
+ #
47
+ # After this initial line there should be a set of one or more transactions, just as
48
+ # if it were normal entry.
49
+ # !
50
+ # A line beginning with an exclamation mark denotes a command directive. It must be
51
+ # immediately followed by the command word. The supported commands are:
52
+ #
53
+ # '!include'
54
+ # Include the stated ledger file.
55
+ #
56
+ # '!account'
57
+ # The account name is given is taken to be the parent of all transactions that
58
+ # follow, until '!end' is seen.
59
+ #
60
+ # '!end'
61
+ # Ends an account block.
62
+ #
63
+ # ;
64
+ # A line beginning with a colon indicates a comment, and is ignored.
65
+ # Y
66
+ # If a line begins with a capital Y, it denotes the year used for all subsequent
67
+ # entries that give a date without a year. The year should appear immediately after
68
+ # the Y, for example: 'Y2004'. This is useful at the beginning of a file, to specify
69
+ # the year for that file. If all entries specify a year, however, this command has
70
+ # no effect.
71
+ #
72
+ # P
73
+ # Specifies a historical price for a commodity. These are usually found in a pricing
74
+ # history file (see the -Q option). The syntax is:
75
+ #
76
+ # P DATE SYMBOL PRICE
77
+ #
78
+ # N SYMBOL
79
+ # Indicates that pricing information is to be ignored for a given symbol, nor will
80
+ # quotes ever be downloaded for that symbol. Useful with a home currency, such as
81
+ # the dollar ($). It is recommended that these pricing options be set in the price
82
+ # database file, which defaults to ~/.pricedb. The syntax for this command is:
83
+ #
84
+ # N SYMBOL
85
+ #
86
+ # D AMOUNT
87
+ # Specifies the default commodity to use, by specifying an amount in the expected
88
+ # format. The entry command will use this commodity as the default when none other
89
+ # can be determined. This command may be used multiple times, to set the default
90
+ # flags for different commodities; whichever is seen last is used as the default
91
+ # commodity. For example, to set US dollars as the default commodity, while also
92
+ # setting the thousands flag and decimal flag for that commodity, use:
93
+ #
94
+ # D $1,000.00
95
+ #
96
+ # C AMOUNT1 = AMOUNT2
97
+ # Specifies a commodity conversion, where the first amount is given to be equivalent
98
+ # to the second amount. The first amount should use the decimal precision desired
99
+ # during reporting:
100
+ #
101
+ # C 1.00 Kb = 1024 bytes
102
+ #
103
+ # i, o, b, h
104
+ # These four relate to timeclock support, which permits ledger to read timelog
105
+ # files. See the timeclock's documentation for more info on the syntax of its
106
+ # timelog files.
2
107
 
3
108
  require 'rubygems'
4
109
 
@@ -8,54 +113,123 @@ module Reckon
8
113
  attr_accessor :entries
9
114
 
10
115
  def initialize(ledger, options = {})
11
- @entries = []
116
+ @options = options
117
+ @date_format = options[:date_format] || '%Y-%m-%d'
12
118
  parse(ledger)
13
119
  end
14
120
 
15
121
  def parse(ledger)
16
122
  @entries = []
17
- date = desc = nil
18
- accounts = []
123
+ new_entry = {}
124
+ in_comment = false
125
+ comment_chars = ';#%*|'
19
126
  ledger.strip.split("\n").each do |entry|
20
- next if entry =~ /^\s*$/ || entry =~ /^[^ \t\d]/
21
- if entry =~ /^([\d\/-]+)(\=[\d\/-]+)?(\s+[\*!]?\s*.*?)$/
22
- @entries << { :date => date.strip, :desc => desc.strip, :accounts => balance(accounts) } if date
23
- date = $1
24
- desc = $3
25
- accounts = []
26
- elsif date && entry =~ /^\s+([a-z\s:_\-]+)(\s*$|(\s+[\$\.,\-\d\+]+)($|\s+($|[^\$\.,\-\d\+])))/i
27
- accounts << { :name => $1.strip, :amount => clean_money($3) }
127
+ # strip comment lines
128
+ in_comment = true if entry == 'comment'
129
+ in_comment = false if entry == 'end comment'
130
+ next if in_comment
131
+ next if entry =~ /^\s*[#{comment_chars}]/
132
+
133
+ # (date, type, code, description), type and code are optional
134
+ if (m = entry.match(%r{^(\d+[\d/-]+)\s+([*!])?\s*(\([^)]+\))?\s*(.*)$}))
135
+ add_entry(new_entry)
136
+ new_entry = {
137
+ date: try_parse_date(m[1]),
138
+ type: m[2] || "",
139
+ code: m[3] && m[3].tr('()', '') || "",
140
+ desc: m[4].strip,
141
+ accounts: []
142
+ }
143
+ elsif entry =~ /^\s*$/ && new_entry[:date]
144
+ add_entry(new_entry)
145
+ new_entry = {}
146
+ elsif new_entry[:date] && entry =~ /^\s+/
147
+ LOGGER.info("Adding new account #{entry}")
148
+ new_entry[:accounts] << parse_account_line(entry)
28
149
  else
29
- @entries << { :date => date.strip, :desc => desc.strip, :accounts => balance(accounts) } if date
30
- date = desc = nil
31
- accounts = []
150
+ LOGGER.info("Unknown entry type: #{entry}")
151
+ add_entry(new_entry)
152
+ new_entry = {}
32
153
  end
33
154
  end
34
- @entries << { :date => date.strip, :desc => desc.strip, :accounts => balance(accounts) } if date
155
+ add_entry(new_entry)
35
156
  end
36
157
 
37
- def balance(accounts)
38
- if accounts.any? { |i| i[:amount].nil? }
39
- sum = accounts.inject(0) {|m, account| m + (account[:amount] || 0) }
40
- count = 0
41
- accounts.each do |account|
42
- if account[:amount].nil?
43
- count += 1
44
- account[:amount] = 0 - sum
45
- end
46
- end
47
- if count > 1
48
- puts "Warning: unparsable entry due to more than one missing money value."
49
- p accounts
50
- puts
158
+ # roughly matches ledger csv format
159
+ def to_csv
160
+ return @entries.flat_map do |n|
161
+ n[:accounts].map do |a|
162
+ row = [
163
+ n[:date].strftime(@date_format),
164
+ n[:code],
165
+ n[:desc],
166
+ a[:name],
167
+ "", # currency (not implemented)
168
+ a[:amount],
169
+ n[:type],
170
+ "", # account comment (not implemented)
171
+ ]
172
+ CSV.generate_line(row).strip
51
173
  end
52
174
  end
175
+ end
176
+
177
+ private
178
+
179
+ def add_entry(entry)
180
+ return unless entry[:date] && entry[:accounts].length > 1
181
+
182
+ entry[:accounts] = balance(entry[:accounts])
183
+ @entries << entry
184
+ end
185
+
186
+ def try_parse_date(date_str)
187
+ date = Date.parse(date_str)
188
+ return nil if date.year > 9999 || date.year < 1000
189
+
190
+ date
191
+ rescue ArgumentError
192
+ nil
193
+ end
194
+
195
+ def parse_account_line(entry)
196
+ (account_name, rest) = entry.strip.split(/\s{2,}|\t+/, 2)
197
+
198
+ return {
199
+ name: account_name,
200
+ amount: clean_money("")
201
+ } if rest.nil? || rest.empty?
202
+
203
+ (value, _comment) = rest.split(/;/)
204
+ return {
205
+ name: account_name,
206
+ amount: clean_money(value || "")
207
+ }
208
+ end
209
+
210
+ def balance(accounts)
211
+ return accounts unless accounts.any? { |i| i[:amount].nil? }
212
+
213
+ sum = accounts.reduce(0) { |m, n| m + (n[:amount] || 0) }
214
+ count = 0
215
+ accounts.each do |account|
216
+ next unless account[:amount].nil?
217
+
218
+ count += 1
219
+ account[:amount] = -sum
220
+ end
221
+ if count > 1
222
+ puts "Warning: unparsable entry due to more than one missing money value."
223
+ p accounts
224
+ puts
225
+ end
53
226
 
54
227
  accounts
55
228
  end
56
229
 
57
230
  def clean_money(money)
58
- return nil if money.nil? || money.length == 0
231
+ return nil if money.nil? || money.empty?
232
+
59
233
  money.gsub(/[^0-9.-]/, '').to_f
60
234
  end
61
235
  end
@@ -0,0 +1,4 @@
1
+ module Reckon
2
+ LOGGER = Logger.new(STDERR)
3
+ LOGGER.level = Logger::WARN
4
+ end
@@ -55,9 +55,9 @@ module Reckon
55
55
  any_number_regex = /^(.*?)([\d\.]+)/
56
56
 
57
57
  # Prefer matching the money_format, match any number otherwise
58
- m = value.match( money_format_regex ) ||
58
+ m = value.match( money_format_regex ) ||
59
59
  value.match( any_number_regex )
60
- if m
60
+ if m
61
61
  amount = m[2].to_f
62
62
  # Check whether the money had a - or (, which indicates negative amounts
63
63
  if (m[1].match( /^[\(-]/ ) || m[1].match( /-$/ ))
@@ -71,12 +71,13 @@ module Reckon
71
71
 
72
72
  def Money::likelihood( entry )
73
73
  money_score = 0
74
- money_score += 20 if entry[/^[\-\+\(]{0,2}\$/]
74
+ # digits separated by , or . with no more than 2 trailing digits
75
+ money_score += 40 if entry.match(/\d+[,.]\d{2}[^\d]*$/)
75
76
  money_score += 10 if entry[/^\$?\-?\$?\d+[\.,\d]*?[\.,]\d\d$/]
76
77
  money_score += 10 if entry[/\d+[\.,\d]*?[\.,]\d\d$/]
77
78
  money_score += entry.gsub(/[^\d\.\-\+,\(\)]/, '').length if entry.length < 7
78
- money_score -= entry.length if entry.length > 8
79
- money_score -= 20 if entry !~ /^[\$\+\.\-,\d\(\)]+$/
79
+ money_score -= entry.length if entry.length > 12
80
+ money_score -= 20 if (entry !~ /^[\$\+\.\-,\d\(\)]+$/) && entry.length > 0
80
81
  money_score
81
82
  end
82
83
  end
@@ -112,61 +113,4 @@ module Reckon
112
113
  self
113
114
  end
114
115
  end
115
-
116
- class DateColumn < Array
117
- attr_accessor :endian_precedence
118
- def initialize( arr = [], options = {} )
119
- arr.each do |value|
120
- if options[:date_format]
121
- begin
122
- value = Date.strptime(value, options[:date_format])
123
- rescue
124
- puts "I'm having trouble parsing #{value} with the desired format: #{options[:date_format]}"
125
- exit 1
126
- end
127
- else
128
- value = [$1, $2, $3].join("/") if value =~ /^(\d{4})(\d{2})(\d{2})\d+\[\d+\:GMT\]$/ # chase format
129
- value = [$3, $2, $1].join("/") if value =~ /^(\d{2})\.(\d{2})\.(\d{4})$/ # german format
130
- value = [$3, $2, $1].join("/") if value =~ /^(\d{2})\-(\d{2})\-(\d{4})$/ # nordea format
131
- value = [$1, $2, $3].join("/") if value =~ /^(\d{4})\-(\d{2})\-(\d{2})$/ # yyyy-mm-dd format
132
- value = [$1, $2, $3].join("/") if value =~ /^(\d{4})(\d{2})(\d{2})/ # yyyymmdd format
133
-
134
-
135
- unless @endian_precedence # Try to detect endian_precedence
136
- reg_match = value.match( /^(\d\d)\/(\d\d)\/\d\d\d?\d?/ )
137
- # If first one is not \d\d/\d\d/\d\d\d?\d set it to default
138
- if !reg_match
139
- @endian_precedence = [:middle, :little]
140
- elsif reg_match[1].to_i > 12
141
- @endian_precedence = [:little]
142
- elsif reg_match[2].to_i > 12
143
- @endian_precedence = [:middle]
144
- end
145
- end
146
- end
147
- self.push( value )
148
- end
149
- # if endian_precedence still nil, raise error
150
- unless @endian_precedence || options[:date_format]
151
- raise( "Unable to determine date format. Please specify using --date-format" )
152
- end
153
- end
154
-
155
- def for( index )
156
- value = self.at( index )
157
- guess = Chronic.parse(value, :context => :past,
158
- :endian_precedence => @endian_precedence )
159
- if guess.to_i < 953236800 && value =~ /\//
160
- guess = Chronic.parse((value.split("/")[0...-1] + [(2000 + value.split("/").last.to_i).to_s]).join("/"), :context => :past,
161
- :endian_precedence => @endian_precedence)
162
- end
163
- guess
164
- end
165
-
166
- def pretty_for(index)
167
- self.for(index).strftime("%Y/%m/%d")
168
- end
169
-
170
- end
171
116
  end
172
-
@@ -0,0 +1,3 @@
1
+ module Reckon
2
+ VERSION = "0.5.4"
3
+ end
@@ -1,14 +1,15 @@
1
- # -*- encoding: utf-8 -*-
2
1
  $:.push File.expand_path("../lib", __FILE__)
2
+ require_relative 'lib/reckon/version'
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = %q{reckon}
6
- s.version = "0.4.4"
7
- s.authors = ["Andrew Cantino", "BlackEdder"]
6
+ s.version = Reckon::VERSION
7
+ s.authors = ["Andrew Cantino", "BlackEdder", "Ben Prew"]
8
8
  s.email = %q{andrew@iterationlabs.com}
9
9
  s.homepage = %q{https://github.com/cantino/reckon}
10
10
  s.description = %q{Reckon automagically converts CSV files for use with the command-line accounting tool Ledger. It also helps you to select the correct accounts associated with the CSV data using Bayesian machine learning.}
11
11
  s.summary = %q{Utility for interactively converting and labeling CSV files for the Ledger accounting tool.}
12
+ s.licenses = ['MIT']
12
13
 
13
14
  s.files = `git ls-files`.split("\n")
14
15
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
@@ -16,9 +17,11 @@ Gem::Specification.new do |s|
16
17
  s.require_paths = ["lib"]
17
18
 
18
19
  s.add_development_dependency "rspec", ">= 1.2.9"
19
- s.add_runtime_dependency "fastercsv", ">= 1.5.1"
20
+ s.add_development_dependency "pry", ">= 0.12.2"
21
+ s.add_development_dependency "rantly", "= 1.2.0"
22
+ s.add_development_dependency "github_changelog_generator"
20
23
  s.add_runtime_dependency "chronic", ">= 0.3.0"
21
24
  s.add_runtime_dependency "highline", ">= 1.5.2"
22
25
  s.add_runtime_dependency "terminal-table", ">= 1.4.2"
26
+ s.add_runtime_dependency "rchardet", ">= 1.8.0"
23
27
  end
24
-