reckon 0.4.4 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +3 -0
  3. data/.ruby-version +1 -1
  4. data/.travis.yml +10 -2
  5. data/CHANGELOG.md +235 -0
  6. data/Gemfile +0 -1
  7. data/Gemfile.lock +73 -15
  8. data/README.md +12 -5
  9. data/lib/reckon.rb +13 -12
  10. data/lib/reckon/app.rb +94 -116
  11. data/lib/reckon/cosine_similarity.rb +122 -0
  12. data/lib/reckon/csv_parser.rb +116 -129
  13. data/lib/reckon/date_column.rb +60 -0
  14. data/lib/reckon/ledger_parser.rb +204 -30
  15. data/lib/reckon/logger.rb +4 -0
  16. data/lib/reckon/money.rb +6 -62
  17. data/lib/reckon/version.rb +3 -0
  18. data/reckon.gemspec +8 -5
  19. data/spec/data_fixtures/51-sample.csv +8 -0
  20. data/spec/data_fixtures/51-tokens.yml +9 -0
  21. data/spec/data_fixtures/73-sample.csv +2 -0
  22. data/spec/data_fixtures/73-tokens.yml +8 -0
  23. data/spec/data_fixtures/73-transactions.ledger +7 -0
  24. data/spec/data_fixtures/85-date-example.csv +2 -0
  25. data/spec/data_fixtures/austrian_example.csv +13 -0
  26. data/spec/data_fixtures/bom_utf8_file.csv +1 -0
  27. data/spec/data_fixtures/broker_canada_example.csv +12 -0
  28. data/spec/data_fixtures/chase.csv +9 -0
  29. data/spec/data_fixtures/danish_kroner_nordea_example.csv +6 -0
  30. data/spec/data_fixtures/english_date_example.csv +3 -0
  31. data/spec/data_fixtures/french_example.csv +9 -0
  32. data/spec/data_fixtures/german_date_example.csv +3 -0
  33. data/spec/data_fixtures/harder_date_example.csv +5 -0
  34. data/spec/data_fixtures/ing.csv +3 -0
  35. data/spec/data_fixtures/intuit_mint_example.csv +7 -0
  36. data/spec/data_fixtures/invalid_header_example.csv +6 -0
  37. data/spec/data_fixtures/inversed_credit_card.csv +16 -0
  38. data/spec/data_fixtures/nationwide.csv +4 -0
  39. data/spec/data_fixtures/simple.csv +2 -0
  40. data/spec/data_fixtures/some_other.csv +9 -0
  41. data/spec/data_fixtures/spanish_date_example.csv +3 -0
  42. data/spec/data_fixtures/suntrust.csv +7 -0
  43. data/spec/data_fixtures/test_money_column.csv +3 -0
  44. data/spec/data_fixtures/two_money_columns.csv +5 -0
  45. data/spec/data_fixtures/yyyymmdd_date_example.csv +1 -0
  46. data/spec/reckon/app_spec.rb +96 -34
  47. data/spec/reckon/csv_parser_spec.rb +185 -307
  48. data/spec/reckon/date_column_spec.rb +12 -13
  49. data/spec/reckon/ledger_parser_spec.rb +99 -9
  50. data/spec/reckon/money_spec.rb +42 -29
  51. data/spec/spec_helper.rb +22 -0
  52. metadata +85 -21
  53. data/CHANGES.md +0 -9
@@ -0,0 +1,60 @@
1
+ module Reckon
2
+ class DateColumn < Array
3
+ attr_accessor :endian_precedence
4
+ def initialize( arr = [], options = {} )
5
+ arr.each do |value|
6
+ if options[:date_format]
7
+ begin
8
+ value = Date.strptime(value, options[:date_format])
9
+ rescue
10
+ puts "I'm having trouble parsing #{value} with the desired format: #{options[:date_format]}"
11
+ exit 1
12
+ end
13
+ else
14
+ value = [$1, $2, $3].join("/") if value =~ /^(\d{4})(\d{2})(\d{2})\d+\[\d+\:GMT\]$/ # chase format
15
+ value = [$3, $2, $1].join("/") if value =~ /^(\d{2})\.(\d{2})\.(\d{4})$/ # german format
16
+ value = [$3, $2, $1].join("/") if value =~ /^(\d{2})\-(\d{2})\-(\d{4})$/ # nordea format
17
+ value = [$1, $2, $3].join("/") if value =~ /^(\d{4})\-(\d{2})\-(\d{2})$/ # yyyy-mm-dd format
18
+ value = [$1, $2, $3].join("/") if value =~ /^(\d{4})(\d{2})(\d{2})/ # yyyymmdd format
19
+
20
+
21
+ unless @endian_precedence # Try to detect endian_precedence
22
+ reg_match = value.match( /^(\d\d)\/(\d\d)\/\d\d\d?\d?/ )
23
+ # If first one is not \d\d/\d\d/\d\d\d?\d set it to default
24
+ if !reg_match
25
+ @endian_precedence = [:middle, :little]
26
+ elsif reg_match[1].to_i > 12
27
+ @endian_precedence = [:little]
28
+ elsif reg_match[2].to_i > 12
29
+ @endian_precedence = [:middle]
30
+ end
31
+ end
32
+ end
33
+ self.push( value )
34
+ end
35
+ # if endian_precedence still nil, raise error
36
+ unless @endian_precedence || options[:date_format]
37
+ raise( "Unable to determine date format. Please specify using --date-format" )
38
+ end
39
+ end
40
+
41
+ def for( index )
42
+ value = self.at( index )
43
+ guess = Chronic.parse(value, :context => :past,
44
+ :endian_precedence => @endian_precedence )
45
+ if guess.to_i < 953236800 && value =~ /\//
46
+ guess = Chronic.parse((value.split("/")[0...-1] + [(2000 + value.split("/").last.to_i).to_s]).join("/"), :context => :past,
47
+ :endian_precedence => @endian_precedence)
48
+ end
49
+ guess && guess.to_date
50
+ end
51
+
52
+ def pretty_for(index)
53
+ date = self.for(index)
54
+ return "" if date.nil?
55
+
56
+ date.iso8601
57
+ end
58
+
59
+ end
60
+ end
@@ -1,4 +1,109 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # From: https://www.ledger-cli.org/3.0/doc/ledger3.html#Transactions-and-Comments
5
+ #
6
+ # The ledger file format is quite simple, but also very flexible. It supports many
7
+ # options, though typically the user can ignore most of them. They are summarized below.
8
+ #
9
+ # The initial character of each line determines what the line means, and how it should
10
+ # be interpreted. Allowable initial characters are:
11
+ #
12
+ # NUMBER
13
+ # A line beginning with a number denotes an entry. It may be followed by any
14
+ # number of lines, each beginning with whitespace, to denote the entry's account
15
+ # transactions. The format of the first line is:
16
+ #
17
+ # DATE[=EDATE] [*|!] [(CODE)] DESC
18
+ #
19
+ # If '*' appears after the date (with optional effective date), it indicates the
20
+ # entry is "cleared", which can mean whatever the user wants it to mean. If '!'
21
+ # appears after the date, it indicates d the entry is "pending"; i.e., tentatively
22
+ # cleared from the user's point of view, but not yet actually cleared. If a 'CODE'
23
+ # appears in parentheses, it may be used to indicate a check number, or the type of
24
+ # the transaction. Following these is the payee, or a description of the
25
+ # transaction.
26
+ #
27
+ # The format of each following transaction is:
28
+ #
29
+ # ACCOUNT AMOUNT [; NOTE]
30
+ #
31
+ # The 'ACCOUNT' may be surrounded by parentheses if it is a virtual transactions, or
32
+ # square brackets if it is a virtual transactions that must balance. The 'AMOUNT'
33
+ # can be followed by a per-unit transaction cost, by specifying '@ AMOUNT', or a
34
+ # complete transaction cost with '@@ AMOUNT'. Lastly, the 'NOTE' may specify an
35
+ # actual and/or effective date for the transaction by using the syntax
36
+ # '[ACTUAL_DATE]' or '[=EFFECTIVE_DATE]' or '[ACTUAL_DATE=EFFECtIVE_DATE]'.
37
+ # =
38
+ # An automated entry. A value expression must appear after the equal sign.
39
+ #
40
+ # After this initial line there should be a set of one or more transactions, just as
41
+ # if it were normal entry. If the amounts of the transactions have no commodity,
42
+ # they will be applied as modifiers to whichever real transaction is matched by the
43
+ # value expression.
44
+ # ~
45
+ # A period entry. A period expression must appear after the tilde.
46
+ #
47
+ # After this initial line there should be a set of one or more transactions, just as
48
+ # if it were normal entry.
49
+ # !
50
+ # A line beginning with an exclamation mark denotes a command directive. It must be
51
+ # immediately followed by the command word. The supported commands are:
52
+ #
53
+ # '!include'
54
+ # Include the stated ledger file.
55
+ #
56
+ # '!account'
57
+ # The account name is given is taken to be the parent of all transactions that
58
+ # follow, until '!end' is seen.
59
+ #
60
+ # '!end'
61
+ # Ends an account block.
62
+ #
63
+ # ;
64
+ # A line beginning with a colon indicates a comment, and is ignored.
65
+ # Y
66
+ # If a line begins with a capital Y, it denotes the year used for all subsequent
67
+ # entries that give a date without a year. The year should appear immediately after
68
+ # the Y, for example: 'Y2004'. This is useful at the beginning of a file, to specify
69
+ # the year for that file. If all entries specify a year, however, this command has
70
+ # no effect.
71
+ #
72
+ # P
73
+ # Specifies a historical price for a commodity. These are usually found in a pricing
74
+ # history file (see the -Q option). The syntax is:
75
+ #
76
+ # P DATE SYMBOL PRICE
77
+ #
78
+ # N SYMBOL
79
+ # Indicates that pricing information is to be ignored for a given symbol, nor will
80
+ # quotes ever be downloaded for that symbol. Useful with a home currency, such as
81
+ # the dollar ($). It is recommended that these pricing options be set in the price
82
+ # database file, which defaults to ~/.pricedb. The syntax for this command is:
83
+ #
84
+ # N SYMBOL
85
+ #
86
+ # D AMOUNT
87
+ # Specifies the default commodity to use, by specifying an amount in the expected
88
+ # format. The entry command will use this commodity as the default when none other
89
+ # can be determined. This command may be used multiple times, to set the default
90
+ # flags for different commodities; whichever is seen last is used as the default
91
+ # commodity. For example, to set US dollars as the default commodity, while also
92
+ # setting the thousands flag and decimal flag for that commodity, use:
93
+ #
94
+ # D $1,000.00
95
+ #
96
+ # C AMOUNT1 = AMOUNT2
97
+ # Specifies a commodity conversion, where the first amount is given to be equivalent
98
+ # to the second amount. The first amount should use the decimal precision desired
99
+ # during reporting:
100
+ #
101
+ # C 1.00 Kb = 1024 bytes
102
+ #
103
+ # i, o, b, h
104
+ # These four relate to timeclock support, which permits ledger to read timelog
105
+ # files. See the timeclock's documentation for more info on the syntax of its
106
+ # timelog files.
2
107
 
3
108
  require 'rubygems'
4
109
 
@@ -8,54 +113,123 @@ module Reckon
8
113
  attr_accessor :entries
9
114
 
10
115
  def initialize(ledger, options = {})
11
- @entries = []
116
+ @options = options
117
+ @date_format = options[:date_format] || '%Y-%m-%d'
12
118
  parse(ledger)
13
119
  end
14
120
 
15
121
  def parse(ledger)
16
122
  @entries = []
17
- date = desc = nil
18
- accounts = []
123
+ new_entry = {}
124
+ in_comment = false
125
+ comment_chars = ';#%*|'
19
126
  ledger.strip.split("\n").each do |entry|
20
- next if entry =~ /^\s*$/ || entry =~ /^[^ \t\d]/
21
- if entry =~ /^([\d\/-]+)(\=[\d\/-]+)?(\s+[\*!]?\s*.*?)$/
22
- @entries << { :date => date.strip, :desc => desc.strip, :accounts => balance(accounts) } if date
23
- date = $1
24
- desc = $3
25
- accounts = []
26
- elsif date && entry =~ /^\s+([a-z\s:_\-]+)(\s*$|(\s+[\$\.,\-\d\+]+)($|\s+($|[^\$\.,\-\d\+])))/i
27
- accounts << { :name => $1.strip, :amount => clean_money($3) }
127
+ # strip comment lines
128
+ in_comment = true if entry == 'comment'
129
+ in_comment = false if entry == 'end comment'
130
+ next if in_comment
131
+ next if entry =~ /^\s*[#{comment_chars}]/
132
+
133
+ # (date, type, code, description), type and code are optional
134
+ if (m = entry.match(%r{^(\d+[\d/-]+)\s+([*!])?\s*(\([^)]+\))?\s*(.*)$}))
135
+ add_entry(new_entry)
136
+ new_entry = {
137
+ date: try_parse_date(m[1]),
138
+ type: m[2] || "",
139
+ code: m[3] && m[3].tr('()', '') || "",
140
+ desc: m[4].strip,
141
+ accounts: []
142
+ }
143
+ elsif entry =~ /^\s*$/ && new_entry[:date]
144
+ add_entry(new_entry)
145
+ new_entry = {}
146
+ elsif new_entry[:date] && entry =~ /^\s+/
147
+ LOGGER.info("Adding new account #{entry}")
148
+ new_entry[:accounts] << parse_account_line(entry)
28
149
  else
29
- @entries << { :date => date.strip, :desc => desc.strip, :accounts => balance(accounts) } if date
30
- date = desc = nil
31
- accounts = []
150
+ LOGGER.info("Unknown entry type: #{entry}")
151
+ add_entry(new_entry)
152
+ new_entry = {}
32
153
  end
33
154
  end
34
- @entries << { :date => date.strip, :desc => desc.strip, :accounts => balance(accounts) } if date
155
+ add_entry(new_entry)
35
156
  end
36
157
 
37
- def balance(accounts)
38
- if accounts.any? { |i| i[:amount].nil? }
39
- sum = accounts.inject(0) {|m, account| m + (account[:amount] || 0) }
40
- count = 0
41
- accounts.each do |account|
42
- if account[:amount].nil?
43
- count += 1
44
- account[:amount] = 0 - sum
45
- end
46
- end
47
- if count > 1
48
- puts "Warning: unparsable entry due to more than one missing money value."
49
- p accounts
50
- puts
158
+ # roughly matches ledger csv format
159
+ def to_csv
160
+ return @entries.flat_map do |n|
161
+ n[:accounts].map do |a|
162
+ row = [
163
+ n[:date].strftime(@date_format),
164
+ n[:code],
165
+ n[:desc],
166
+ a[:name],
167
+ "", # currency (not implemented)
168
+ a[:amount],
169
+ n[:type],
170
+ "", # account comment (not implemented)
171
+ ]
172
+ CSV.generate_line(row).strip
51
173
  end
52
174
  end
175
+ end
176
+
177
+ private
178
+
179
+ def add_entry(entry)
180
+ return unless entry[:date] && entry[:accounts].length > 1
181
+
182
+ entry[:accounts] = balance(entry[:accounts])
183
+ @entries << entry
184
+ end
185
+
186
+ def try_parse_date(date_str)
187
+ date = Date.parse(date_str)
188
+ return nil if date.year > 9999 || date.year < 1000
189
+
190
+ date
191
+ rescue ArgumentError
192
+ nil
193
+ end
194
+
195
+ def parse_account_line(entry)
196
+ (account_name, rest) = entry.strip.split(/\s{2,}|\t+/, 2)
197
+
198
+ return {
199
+ name: account_name,
200
+ amount: clean_money("")
201
+ } if rest.nil? || rest.empty?
202
+
203
+ (value, _comment) = rest.split(/;/)
204
+ return {
205
+ name: account_name,
206
+ amount: clean_money(value || "")
207
+ }
208
+ end
209
+
210
+ def balance(accounts)
211
+ return accounts unless accounts.any? { |i| i[:amount].nil? }
212
+
213
+ sum = accounts.reduce(0) { |m, n| m + (n[:amount] || 0) }
214
+ count = 0
215
+ accounts.each do |account|
216
+ next unless account[:amount].nil?
217
+
218
+ count += 1
219
+ account[:amount] = -sum
220
+ end
221
+ if count > 1
222
+ puts "Warning: unparsable entry due to more than one missing money value."
223
+ p accounts
224
+ puts
225
+ end
53
226
 
54
227
  accounts
55
228
  end
56
229
 
57
230
  def clean_money(money)
58
- return nil if money.nil? || money.length == 0
231
+ return nil if money.nil? || money.empty?
232
+
59
233
  money.gsub(/[^0-9.-]/, '').to_f
60
234
  end
61
235
  end
@@ -0,0 +1,4 @@
1
+ module Reckon
2
+ LOGGER = Logger.new(STDERR)
3
+ LOGGER.level = Logger::WARN
4
+ end
@@ -55,9 +55,9 @@ module Reckon
55
55
  any_number_regex = /^(.*?)([\d\.]+)/
56
56
 
57
57
  # Prefer matching the money_format, match any number otherwise
58
- m = value.match( money_format_regex ) ||
58
+ m = value.match( money_format_regex ) ||
59
59
  value.match( any_number_regex )
60
- if m
60
+ if m
61
61
  amount = m[2].to_f
62
62
  # Check whether the money had a - or (, which indicates negative amounts
63
63
  if (m[1].match( /^[\(-]/ ) || m[1].match( /-$/ ))
@@ -71,12 +71,13 @@ module Reckon
71
71
 
72
72
  def Money::likelihood( entry )
73
73
  money_score = 0
74
- money_score += 20 if entry[/^[\-\+\(]{0,2}\$/]
74
+ # digits separated by , or . with no more than 2 trailing digits
75
+ money_score += 40 if entry.match(/\d+[,.]\d{2}[^\d]*$/)
75
76
  money_score += 10 if entry[/^\$?\-?\$?\d+[\.,\d]*?[\.,]\d\d$/]
76
77
  money_score += 10 if entry[/\d+[\.,\d]*?[\.,]\d\d$/]
77
78
  money_score += entry.gsub(/[^\d\.\-\+,\(\)]/, '').length if entry.length < 7
78
- money_score -= entry.length if entry.length > 8
79
- money_score -= 20 if entry !~ /^[\$\+\.\-,\d\(\)]+$/
79
+ money_score -= entry.length if entry.length > 12
80
+ money_score -= 20 if (entry !~ /^[\$\+\.\-,\d\(\)]+$/) && entry.length > 0
80
81
  money_score
81
82
  end
82
83
  end
@@ -112,61 +113,4 @@ module Reckon
112
113
  self
113
114
  end
114
115
  end
115
-
116
- class DateColumn < Array
117
- attr_accessor :endian_precedence
118
- def initialize( arr = [], options = {} )
119
- arr.each do |value|
120
- if options[:date_format]
121
- begin
122
- value = Date.strptime(value, options[:date_format])
123
- rescue
124
- puts "I'm having trouble parsing #{value} with the desired format: #{options[:date_format]}"
125
- exit 1
126
- end
127
- else
128
- value = [$1, $2, $3].join("/") if value =~ /^(\d{4})(\d{2})(\d{2})\d+\[\d+\:GMT\]$/ # chase format
129
- value = [$3, $2, $1].join("/") if value =~ /^(\d{2})\.(\d{2})\.(\d{4})$/ # german format
130
- value = [$3, $2, $1].join("/") if value =~ /^(\d{2})\-(\d{2})\-(\d{4})$/ # nordea format
131
- value = [$1, $2, $3].join("/") if value =~ /^(\d{4})\-(\d{2})\-(\d{2})$/ # yyyy-mm-dd format
132
- value = [$1, $2, $3].join("/") if value =~ /^(\d{4})(\d{2})(\d{2})/ # yyyymmdd format
133
-
134
-
135
- unless @endian_precedence # Try to detect endian_precedence
136
- reg_match = value.match( /^(\d\d)\/(\d\d)\/\d\d\d?\d?/ )
137
- # If first one is not \d\d/\d\d/\d\d\d?\d set it to default
138
- if !reg_match
139
- @endian_precedence = [:middle, :little]
140
- elsif reg_match[1].to_i > 12
141
- @endian_precedence = [:little]
142
- elsif reg_match[2].to_i > 12
143
- @endian_precedence = [:middle]
144
- end
145
- end
146
- end
147
- self.push( value )
148
- end
149
- # if endian_precedence still nil, raise error
150
- unless @endian_precedence || options[:date_format]
151
- raise( "Unable to determine date format. Please specify using --date-format" )
152
- end
153
- end
154
-
155
- def for( index )
156
- value = self.at( index )
157
- guess = Chronic.parse(value, :context => :past,
158
- :endian_precedence => @endian_precedence )
159
- if guess.to_i < 953236800 && value =~ /\//
160
- guess = Chronic.parse((value.split("/")[0...-1] + [(2000 + value.split("/").last.to_i).to_s]).join("/"), :context => :past,
161
- :endian_precedence => @endian_precedence)
162
- end
163
- guess
164
- end
165
-
166
- def pretty_for(index)
167
- self.for(index).strftime("%Y/%m/%d")
168
- end
169
-
170
- end
171
116
  end
172
-
@@ -0,0 +1,3 @@
1
+ module Reckon
2
+ VERSION = "0.5.4"
3
+ end
@@ -1,14 +1,15 @@
1
- # -*- encoding: utf-8 -*-
2
1
  $:.push File.expand_path("../lib", __FILE__)
2
+ require_relative 'lib/reckon/version'
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = %q{reckon}
6
- s.version = "0.4.4"
7
- s.authors = ["Andrew Cantino", "BlackEdder"]
6
+ s.version = Reckon::VERSION
7
+ s.authors = ["Andrew Cantino", "BlackEdder", "Ben Prew"]
8
8
  s.email = %q{andrew@iterationlabs.com}
9
9
  s.homepage = %q{https://github.com/cantino/reckon}
10
10
  s.description = %q{Reckon automagically converts CSV files for use with the command-line accounting tool Ledger. It also helps you to select the correct accounts associated with the CSV data using Bayesian machine learning.}
11
11
  s.summary = %q{Utility for interactively converting and labeling CSV files for the Ledger accounting tool.}
12
+ s.licenses = ['MIT']
12
13
 
13
14
  s.files = `git ls-files`.split("\n")
14
15
  s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
@@ -16,9 +17,11 @@ Gem::Specification.new do |s|
16
17
  s.require_paths = ["lib"]
17
18
 
18
19
  s.add_development_dependency "rspec", ">= 1.2.9"
19
- s.add_runtime_dependency "fastercsv", ">= 1.5.1"
20
+ s.add_development_dependency "pry", ">= 0.12.2"
21
+ s.add_development_dependency "rantly", "= 1.2.0"
22
+ s.add_development_dependency "github_changelog_generator"
20
23
  s.add_runtime_dependency "chronic", ">= 0.3.0"
21
24
  s.add_runtime_dependency "highline", ">= 1.5.2"
22
25
  s.add_runtime_dependency "terminal-table", ">= 1.4.2"
26
+ s.add_runtime_dependency "rchardet", ">= 1.8.0"
23
27
  end
24
-