reckon 0.6.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +50 -0
  3. data/.gitignore +3 -0
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +75 -7
  6. data/Gemfile.lock +1 -1
  7. data/README.md +85 -24
  8. data/Rakefile +17 -1
  9. data/bin/build-new-version.sh +26 -0
  10. data/bin/reckon +9 -1
  11. data/lib/reckon.rb +1 -0
  12. data/lib/reckon/app.rb +18 -141
  13. data/lib/reckon/cosine_similarity.rb +67 -62
  14. data/lib/reckon/csv_parser.rb +2 -7
  15. data/lib/reckon/date_column.rb +10 -0
  16. data/lib/reckon/money.rb +59 -52
  17. data/lib/reckon/options.rb +153 -0
  18. data/lib/reckon/version.rb +1 -1
  19. data/spec/cosine_training_and_test.rb +52 -0
  20. data/spec/integration/another_bank_example/input.csv +9 -0
  21. data/spec/integration/another_bank_example/output.ledger +36 -0
  22. data/spec/integration/another_bank_example/test_args +1 -0
  23. data/spec/integration/ask_for_account/cli_input.exp +33 -0
  24. data/spec/integration/ask_for_account/expected_output +11 -0
  25. data/spec/integration/ask_for_account/input.csv +9 -0
  26. data/spec/integration/ask_for_account/test_args +1 -0
  27. data/spec/integration/austrian_example/input.csv +13 -0
  28. data/spec/integration/austrian_example/output.ledger +52 -0
  29. data/spec/integration/austrian_example/test_args +2 -0
  30. data/spec/integration/bom_utf8_file/input.csv +3 -0
  31. data/spec/integration/bom_utf8_file/output.ledger +4 -0
  32. data/spec/integration/bom_utf8_file/test_args +3 -0
  33. data/spec/integration/broker_canada_example/input.csv +12 -0
  34. data/spec/integration/broker_canada_example/output.ledger +48 -0
  35. data/spec/integration/broker_canada_example/test_args +1 -0
  36. data/spec/integration/chase/account_tokens_and_regex/output.ledger +36 -0
  37. data/spec/integration/chase/account_tokens_and_regex/test_args +2 -0
  38. data/spec/integration/chase/account_tokens_and_regex/tokens.yml +16 -0
  39. data/spec/integration/chase/default_account_names/output.ledger +36 -0
  40. data/spec/integration/chase/default_account_names/test_args +3 -0
  41. data/spec/integration/chase/input.csv +9 -0
  42. data/spec/integration/chase/learn_from_existing/learn.ledger +7 -0
  43. data/spec/integration/chase/learn_from_existing/output.ledger +36 -0
  44. data/spec/integration/chase/learn_from_existing/test_args +1 -0
  45. data/spec/integration/chase/simple/output.ledger +36 -0
  46. data/spec/integration/chase/simple/test_args +1 -0
  47. data/spec/integration/danish_kroner_nordea_example/input.csv +6 -0
  48. data/spec/integration/danish_kroner_nordea_example/output.ledger +24 -0
  49. data/spec/integration/danish_kroner_nordea_example/test_args +1 -0
  50. data/spec/integration/english_date_example/input.csv +3 -0
  51. data/spec/integration/english_date_example/output.ledger +12 -0
  52. data/spec/integration/english_date_example/test_args +1 -0
  53. data/spec/integration/extratofake/input.csv +24 -0
  54. data/spec/integration/extratofake/output.ledger +92 -0
  55. data/spec/integration/extratofake/test_args +1 -0
  56. data/spec/integration/french_example/input.csv +9 -0
  57. data/spec/integration/french_example/output.ledger +36 -0
  58. data/spec/integration/french_example/test_args +2 -0
  59. data/spec/integration/german_date_example/input.csv +3 -0
  60. data/spec/integration/german_date_example/output.ledger +12 -0
  61. data/spec/integration/german_date_example/test_args +1 -0
  62. data/spec/integration/harder_date_example/input.csv +5 -0
  63. data/spec/integration/harder_date_example/output.ledger +20 -0
  64. data/spec/integration/harder_date_example/test_args +1 -0
  65. data/spec/integration/ing/input.csv +3 -0
  66. data/spec/integration/ing/output.ledger +12 -0
  67. data/spec/integration/ing/test_args +1 -0
  68. data/spec/integration/intuit_mint_example/input.csv +7 -0
  69. data/spec/integration/intuit_mint_example/output.ledger +28 -0
  70. data/spec/integration/intuit_mint_example/test_args +1 -0
  71. data/spec/integration/invalid_header_example/input.csv +6 -0
  72. data/spec/integration/invalid_header_example/output.ledger +8 -0
  73. data/spec/integration/invalid_header_example/test_args +1 -0
  74. data/spec/integration/inversed_credit_card/input.csv +16 -0
  75. data/spec/integration/inversed_credit_card/output.ledger +64 -0
  76. data/spec/integration/inversed_credit_card/test_args +1 -0
  77. data/spec/integration/nationwide/input.csv +4 -0
  78. data/spec/integration/nationwide/output.ledger +16 -0
  79. data/spec/integration/nationwide/test_args +1 -0
  80. data/spec/integration/regression/issue_51_account_tokens/input.csv +8 -0
  81. data/spec/integration/regression/issue_51_account_tokens/output.ledger +32 -0
  82. data/spec/integration/regression/issue_51_account_tokens/test_args +4 -0
  83. data/spec/integration/regression/issue_51_account_tokens/tokens.yml +9 -0
  84. data/spec/integration/regression/issue_64_date_column/input.csv +3 -0
  85. data/spec/integration/regression/issue_64_date_column/output.ledger +8 -0
  86. data/spec/integration/regression/issue_64_date_column/test_args +1 -0
  87. data/spec/integration/regression/issue_73_account_token_matching/input.csv +2 -0
  88. data/spec/integration/regression/issue_73_account_token_matching/output.ledger +4 -0
  89. data/spec/integration/regression/issue_73_account_token_matching/test_args +6 -0
  90. data/spec/integration/regression/issue_73_account_token_matching/tokens.yml +8 -0
  91. data/spec/integration/regression/issue_85_date_example/input.csv +2 -0
  92. data/spec/integration/regression/issue_85_date_example/output.ledger +8 -0
  93. data/spec/integration/regression/issue_85_date_example/test_args +1 -0
  94. data/spec/integration/spanish_date_example/input.csv +3 -0
  95. data/spec/integration/spanish_date_example/output.ledger +12 -0
  96. data/spec/integration/spanish_date_example/test_args +1 -0
  97. data/spec/integration/suntrust/input.csv +7 -0
  98. data/spec/integration/suntrust/output.ledger +28 -0
  99. data/spec/integration/suntrust/test_args +1 -0
  100. data/spec/integration/test.sh +123 -0
  101. data/spec/integration/test_money_column/input.csv +3 -0
  102. data/spec/integration/test_money_column/output.ledger +8 -0
  103. data/spec/integration/test_money_column/test_args +1 -0
  104. data/spec/integration/two_money_columns/input.csv +5 -0
  105. data/spec/integration/two_money_columns/output.ledger +20 -0
  106. data/spec/integration/two_money_columns/test_args +1 -0
  107. data/spec/integration/yyyymmdd_date_example/input.csv +1 -0
  108. data/spec/integration/yyyymmdd_date_example/output.ledger +4 -0
  109. data/spec/integration/yyyymmdd_date_example/test_args +1 -0
  110. data/spec/reckon/app_spec.rb +24 -6
  111. data/spec/reckon/csv_parser_spec.rb +3 -3
  112. data/spec/reckon/money_column_spec.rb +24 -24
  113. data/spec/reckon/money_spec.rb +15 -34
  114. data/spec/reckon/options_spec.rb +17 -0
  115. data/spec/spec_helper.rb +6 -1
  116. metadata +102 -7
  117. data/.travis.yml +0 -13
data/lib/reckon.rb CHANGED
@@ -16,4 +16,5 @@ require_relative 'reckon/date_column'
16
16
  require_relative 'reckon/money'
17
17
  require_relative 'reckon/ledger_parser'
18
18
  require_relative 'reckon/csv_parser'
19
+ require_relative 'reckon/options'
19
20
  require_relative 'reckon/app'
data/lib/reckon/app.rb CHANGED
@@ -8,9 +8,10 @@ module Reckon
8
8
  attr_accessor :options, :seen, :csv_parser, :regexps, :matcher
9
9
  @@cli = HighLine.new
10
10
 
11
- def initialize(options = {})
11
+ def initialize(opts = {})
12
+ self.options = opts
12
13
  LOGGER.level = Logger::INFO if options[:verbose]
13
- self.options = options
14
+
14
15
  self.regexps = {}
15
16
  self.seen = Set.new
16
17
  self.options[:currency] ||= '$'
@@ -19,10 +20,10 @@ module Reckon
19
20
  learn!
20
21
  end
21
22
 
22
- def interactive_output(str)
23
+ def interactive_output(str, fh = $stdout)
23
24
  return if options[:unattended]
24
25
 
25
- puts str
26
+ fh.puts str
26
27
  end
27
28
 
28
29
  def learn!
@@ -157,10 +158,10 @@ module Reckon
157
158
  :money => @csv_parser.money_for(index),
158
159
  :description => @csv_parser.description_for(index) }
159
160
  end
160
- rows.sort_by { |n| n[:date] }.each { |row| yield row }
161
+ rows.sort_by { |n| [n[:date], -n[:money], n[:description]] }.each { |row| yield row }
161
162
  end
162
163
 
163
- def print_transaction(rows)
164
+ def print_transaction(rows, fh = $stdout)
164
165
  str = "\n"
165
166
  header = %w[Date Amount Description Note]
166
167
  maxes = header.map(&:length)
@@ -184,7 +185,7 @@ module Reckon
184
185
  str += "\n"
185
186
  end
186
187
 
187
- interactive_output str
188
+ interactive_output str, fh
188
189
  end
189
190
 
190
191
  def ask_account_question(msg, row)
@@ -192,11 +193,13 @@ module Reckon
192
193
  LOGGER.info "possible_answers===> #{possible_answers.inspect}"
193
194
 
194
195
  if options[:unattended]
195
- default = if row[:pretty_money][0] == '-'
196
- options[:default_into_account] || 'Expenses:Unknown'
197
- else
198
- options[:default_outof_account] || 'Income:Unknown'
199
- end
196
+ if options[:fail_on_unknown_account] && possible_answers.empty?
197
+ raise %(Couldn't find any matches for '#{row[:description]}'
198
+ Try adding an account token with --account-tokens)
199
+ end
200
+
201
+ default = options[:default_outof_account]
202
+ default = options[:default_into_account] if row[:pretty_money][0] == '-'
200
203
  return possible_answers[0] || default
201
204
  end
202
205
 
@@ -252,7 +255,7 @@ module Reckon
252
255
  end
253
256
 
254
257
  def ledger_format(row, line1, line2)
255
- out = "#{row[:pretty_date]}\t#{row[:description]}\t; #{row[:note]}\n"
258
+ out = "#{row[:pretty_date]}\t#{row[:description]}#{row[:note] ? "\t; " + row[:note]: ""}\n"
256
259
  out += "\t#{line1.first}\t\t\t#{line1.last}\n"
257
260
  out += "\t#{line2.first}\t\t\t#{line2.last}\n\n"
258
261
  out
@@ -277,138 +280,12 @@ module Reckon
277
280
  exit
278
281
  end
279
282
 
280
- def output_table
283
+ def output_table(fh = $stdout)
281
284
  rows = []
282
285
  each_row_backwards do |row|
283
286
  rows << row
284
287
  end
285
- print_transaction(rows)
286
- end
287
-
288
- def self.parse_opts(args = ARGV)
289
- options = { :output_file => STDOUT }
290
- parser = OptionParser.new do |opts|
291
- opts.banner = "Usage: Reckon.rb [options]"
292
- opts.separator ""
293
-
294
- opts.on("-f", "--file FILE", "The CSV file to parse") do |file|
295
- options[:file] = file
296
- end
297
-
298
- opts.on("-a", "--account NAME", "The Ledger Account this file is for") do |a|
299
- options[:bank_account] = a
300
- end
301
-
302
- opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
303
- options[:verbose] = v
304
- end
305
-
306
- opts.on("-i", "--inverse", "Use the negative of each amount") do |v|
307
- options[:inverse] = v
308
- end
309
-
310
- opts.on("-p", "--print-table", "Print out the parsed CSV in table form") do |p|
311
- options[:print_table] = p
312
- end
313
-
314
- opts.on("-o", "--output-file FILE", "The ledger file to append to") do |o|
315
- options[:output_file] = File.open(o, 'a')
316
- end
317
-
318
- opts.on("-l", "--learn-from FILE", "An existing ledger file to learn accounts from") do |l|
319
- options[:existing_ledger_file] = l
320
- end
321
-
322
- opts.on("", "--ignore-columns 1,2,5", "Columns to ignore in the CSV file - the first column is column 1") do |ignore|
323
- options[:ignore_columns] = ignore.split(",").map { |i| i.to_i }
324
- end
325
-
326
- opts.on("", "--money-column 2", Integer, "Specify the money column instead of letting Reckon guess - the first column is column 1") do |column_number|
327
- options[:money_column] = column_number
328
- end
329
-
330
- opts.on("", "--date-column 3", Integer, "Specify the date column instead of letting Reckon guess - the first column is column 1") do |column_number|
331
- options[:date_column] = column_number
332
- end
333
-
334
- opts.on("", "--contains-header [N]", "The first row of the CSV is a header and should be skipped. Optionally add the number of rows to skip.") do |contains_header|
335
- options[:contains_header] = 1
336
- options[:contains_header] = contains_header.to_i if contains_header
337
- end
338
-
339
- opts.on("", "--csv-separator ','", "Separator for parsing the CSV - default is comma.") do |csv_separator|
340
- options[:csv_separator] = csv_separator
341
- end
342
-
343
- opts.on("", "--comma-separates-cents", "Use comma instead of period to deliminate dollars from cents when parsing ($100,50 instead of $100.50)") do |c|
344
- options[:comma_separates_cents] = c
345
- end
346
-
347
- opts.on("", "--encoding 'UTF-8'", "Specify an encoding for the CSV file; not usually needed") do |e|
348
- options[:encoding] = e
349
- end
350
-
351
- opts.on("-c", "--currency '$'", "Currency symbol to use, defaults to $ (£, EUR)") do |e|
352
- options[:currency] = e
353
- end
354
-
355
- opts.on("", "--date-format '%d/%m/%Y'", "Force the date format (see Ruby DateTime strftime)") do |d|
356
- options[:date_format] = d
357
- end
358
-
359
- opts.on("-u", "--unattended", "Don't ask questions and guess all the accounts automatically. Used with --learn-from or --account-tokens options.") do |n|
360
- options[:unattended] = n
361
- end
362
-
363
- opts.on("-t", "--account-tokens FILE", "YAML file with manually-assigned tokens for each account (see README)") do |a|
364
- options[:account_tokens_file] = a
365
- end
366
-
367
- opts.on("", "--default-into-account NAME", "Default into account") do |a|
368
- options[:default_into_account] = a
369
- end
370
-
371
- opts.on("", "--default-outof-account NAME", "Default 'out of' account") do |a|
372
- options[:default_outof_account] = a
373
- end
374
-
375
- opts.on("", "--suffixed", "If --currency should be used as a suffix. Defaults to false.") do |e|
376
- options[:suffixed] = e
377
- end
378
-
379
- opts.on_tail("-h", "--help", "Show this message") do
380
- puts opts
381
- exit
382
- end
383
-
384
- opts.on_tail("--version", "Show version") do
385
- puts VERSION
386
- exit
387
- end
388
-
389
- opts.parse!(args)
390
- end
391
-
392
- unless options[:file]
393
- options[:file] = @@cli.ask("What CSV file should I parse? ")
394
- unless options[:file].length > 0
395
- puts "\nYou must provide a CSV file to parse.\n"
396
- puts parser
397
- exit
398
- end
399
- end
400
-
401
- unless options[:bank_account]
402
- fail "Please specify --account for the unattended mode" if options[:unattended]
403
-
404
- options[:bank_account] = @@cli.ask("What is the account name of this bank account in Ledger? ") do |q|
405
- q.readline = true
406
- q.validate = /^.{2,}$/
407
- q.default = "Assets:Bank:Checking"
408
- end
409
- end
410
-
411
- options
288
+ print_transaction(rows, fh)
412
289
  end
413
290
  end
414
291
  end
@@ -1,47 +1,52 @@
1
1
  require 'matrix'
2
2
  require 'set'
3
3
 
4
- # Implementation of consine similarity using TF-IDF for vectorization.
5
- # Used to suggest which account a transaction should be assigned to
4
+ # Implementation of cosine similarity using TF-IDF for vectorization.
5
+ #
6
+ # In information retrieval, tf–idf, short for term frequency–inverse document frequency,
7
+ # is a numerical statistic that is intended to reflect how important a word is to a
8
+ # document in a collection or corpus
9
+ #
10
+ # Cosine Similarity a measurement to determine how similar 2 documents are to each other.
11
+ #
12
+ # These weights and measures are used to suggest which account a transaction should be
13
+ # assigned to.
6
14
  module Reckon
7
15
  class CosineSimilarity
16
+ DocumentInfo = Struct.new(:tokens, :accounts)
17
+
8
18
  def initialize(options)
19
+ @docs = DocumentInfo.new({}, {})
9
20
  @options = options
10
- @tokens = {}
11
- @accounts = Hash.new(0)
12
21
  end
13
22
 
14
23
  def add_document(account, doc)
15
- tokenize(doc).each do |n|
24
+ tokens = tokenize(doc)
25
+ LOGGER.info "doc tokens: #{tokens}"
26
+ tokens.each do |n|
16
27
  (token, count) = n
17
28
 
18
- @tokens[token] ||= {}
19
- @tokens[token][account] ||= 0
20
- @tokens[token][account] += count
21
- @accounts[account] += count
29
+ @docs.tokens[token] ||= Hash.new(0)
30
+ @docs.tokens[token][account] += count
31
+ @docs.accounts[account] ||= Hash.new(0)
32
+ @docs.accounts[account][token] += count
22
33
  end
23
34
  end
24
35
 
25
36
  # find most similar documents to query
26
37
  def find_similar(query)
27
- (query_scores, corpus_scores) = td_idf_scores_for(query)
38
+ LOGGER.info "find_similar #{query}"
28
39
 
29
- query_vector = Vector.elements(query_scores, false)
40
+ accounts = docs_to_check(query).map do |a|
41
+ [a, tfidf(@docs.accounts[a])]
42
+ end
30
43
 
31
- # For each doc, calculate the similarity to the query
32
- suggestions = corpus_scores.map do |account, scores|
33
- acct_vector = Vector.elements(scores, false)
44
+ q = tfidf(tokenize(query))
34
45
 
35
- acct_query_dp = acct_vector.inner_product(query_vector)
36
- # similarity is a float between 1 and -1, where 1 is exactly the same and -1 is
37
- # exactly opposite
38
- # see https://en.wikipedia.org/wiki/Cosine_similarity
39
- # cos(theta) = (A . B) / (||A|| ||B||)
40
- # where A . B is the "dot product" and ||A|| is the magnitude of A
41
- # ruby has the 'matrix' library we can use to do these calculations.
46
+ suggestions = accounts.map do |a, d|
42
47
  {
43
- similarity: acct_query_dp / (acct_vector.magnitude * query_vector.magnitude),
44
- account: account,
48
+ similarity: calc_similarity(q, d),
49
+ account: a
45
50
  }
46
51
  end.select { |n| n[:similarity] > 0 }.sort_by { |n| -n[:similarity] }
47
52
 
@@ -52,50 +57,51 @@ module Reckon
52
57
 
53
58
  private
54
59
 
55
- def td_idf_scores_for(query)
56
- query_tokens = tokenize(query)
57
- corpus = Set.new
58
- corpus_scores = {}
59
- query_scores = []
60
- num_docs = @accounts.length
61
-
62
- query_tokens.each do |n|
63
- (token, _count) = n
64
- next unless @tokens[token]
65
- corpus = corpus.union(Set.new(@tokens[token].keys))
60
+ def docs_to_check(query)
61
+ return tokenize(query).reduce(Set.new) do |corpus, t|
62
+ corpus.union(Set.new(@docs.tokens[t[0]]&.keys))
66
63
  end
64
+ end
67
65
 
68
- query_tokens.each do |n|
69
- (token, count) = n
70
-
71
- # if no other docs have token, ignore it
72
- next unless @tokens[token]
66
+ def tfidf(tokens)
67
+ scores = {}
73
68
 
74
- ## First, calculate scores for our query as we're building scores for the corpus
75
- query_scores << calc_tf_idf(
76
- count,
77
- query_tokens.length,
78
- @tokens[token].length,
79
- num_docs
69
+ tokens.each do |t, n|
70
+ scores[t] = calc_tf_idf(
71
+ n,
72
+ tokens.length,
73
+ @docs.tokens[t]&.length&.to_f || 0,
74
+ @docs.accounts.length
80
75
  )
81
-
82
- ## Next, calculate for the corpus, where our "account" is a document
83
- corpus.each do |account|
84
- corpus_scores[account] ||= []
85
-
86
- corpus_scores[account] << calc_tf_idf(
87
- (@tokens[token][account] || 0),
88
- @accounts[account].to_f,
89
- @tokens[token].length.to_f,
90
- num_docs
91
- )
92
- end
93
76
  end
94
- [query_scores, corpus_scores]
77
+
78
+ return scores
95
79
  end
96
80
 
97
- def calc_tf_idf(token_count, num_words_in_doc, df, num_docs)
81
+ # Cosine similarity is used to compare how similar 2 documents are. Returns a float
82
+ # between 1 and -1, where 1 is exactly the same and -1 is exactly opposite.
83
+ #
84
+ # see https://en.wikipedia.org/wiki/Cosine_similarity
85
+ # cos(theta) = (A . B) / (||A|| ||B||)
86
+ # where A . B is the "dot product" and ||A|| is the magnitude of A
87
+ #
88
+ # The variables A and B are the set of unique terms in q and d.
89
+ #
90
+ # For example, when q = "big red balloon" and d ="small green balloon" then the
91
+ # variables are (big,red,balloon,small,green) and a = (1,1,1,0,0) and b =
92
+ # (0,0,1,1,1).
93
+ #
94
+ # query and doc are hashes of token => tf/idf score
95
+ def calc_similarity(query, doc)
96
+ tokens = Set.new(query.keys + doc.keys)
97
+
98
+ a = Vector.elements(tokens.map { |n| query[n] || 0 }, false)
99
+ b = Vector.elements(tokens.map { |n| doc[n] || 0 }, false)
100
+
101
+ return a.inner_product(b) / (a.magnitude * b.magnitude)
102
+ end
98
103
 
104
+ def calc_tf_idf(token_count, num_words_in_doc, df, num_docs)
99
105
  # tf(t,d) = count of t in d / number of words in d
100
106
  tf = token_count / num_words_in_doc.to_f
101
107
 
@@ -109,14 +115,13 @@ module Reckon
109
115
  end
110
116
 
111
117
  def tokenize(str)
112
- mk_tokens(str).inject(Hash.new(0)) do |memo, n|
118
+ mk_tokens(str).each_with_object(Hash.new(0)) do |n, memo|
113
119
  memo[n] += 1
114
- memo
115
120
  end.to_a
116
121
  end
117
122
 
118
123
  def mk_tokens(str)
119
- str.downcase.tr(';', ' ').tr("'", '').split(/[^a-z0-9.]+/)
124
+ str.downcase.tr(';', ' ').tr("'", '').split(/[^a-z0-9.]+/).reject(&:empty?)
120
125
  end
121
126
  end
122
127
  end
@@ -89,12 +89,7 @@ module Reckon
89
89
  money_score += Money::likelihood( entry )
90
90
  possible_neg_money_count += 1 if entry =~ /^\$?[\-\(]\$?\d+/
91
91
  possible_pos_money_count += 1 if entry =~ /^\+?\$?\+?\d+/
92
- date_score += 10 if entry =~ /\b(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)/i
93
- date_score += 5 if entry =~ /^[\-\/\.\d:\[\]]+$/
94
- date_score += entry.gsub(/[^\-\/\.\d:\[\]]/, '').length if entry.gsub(/[^\-\/\.\d:\[\]]/, '').length > 3
95
- date_score -= entry.gsub(/[\-\/\.\d:\[\]]/, '').length
96
- date_score += 30 if entry =~ /^\d+[:\/\.-]\d+[:\/\.-]\d+([ :]\d+[:\/\.]\d+)?$/
97
- date_score += 10 if entry =~ /^\d+\[\d+:GMT\]$/i
92
+ date_score += DateColumn.likelihood(entry)
98
93
 
99
94
  # Try to determine if this is a balance column
100
95
  entry_as_num = entry.gsub(/[^\-\d\.]/, '').to_f
@@ -168,7 +163,7 @@ module Reckon
168
163
  results = evaluate_columns(columns)
169
164
 
170
165
  if options[:money_column]
171
- self.money_column_indices = [ options[:money_column] - 1 ]
166
+ self.money_column_indices = [options[:money_column] - 1]
172
167
  else
173
168
  self.money_column_indices = results.select { |n| n[:is_money_column] }.map { |n| n[:index] }
174
169
  if self.money_column_indices.length == 1
@@ -56,5 +56,15 @@ module Reckon
56
56
  date.iso8601
57
57
  end
58
58
 
59
+ def self.likelihood(entry)
60
+ date_score = 0
61
+ date_score += 10 if entry =~ /\b(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)/i
62
+ date_score += 5 if entry =~ /^[\-\/\.\d:\[\]]+$/
63
+ date_score += entry.gsub(/[^\-\/\.\d:\[\]]/, '').length if entry.gsub(/[^\-\/\.\d:\[\]]/, '').length > 3
64
+ date_score -= entry.gsub(/[\-\/\.\d:\[\]]/, '').length
65
+ date_score += 30 if entry =~ /^\d+[:\/\.-]\d+[:\/\.-]\d+([ :]\d+[:\/\.]\d+)?$/
66
+ date_score += 10 if entry =~ /^\d+\[\d+:GMT\]$/i
67
+ return date_score
68
+ end
59
69
  end
60
70
  end
data/lib/reckon/money.rb CHANGED
@@ -5,12 +5,13 @@ module Reckon
5
5
  class Money
6
6
  include Comparable
7
7
  attr_accessor :amount, :currency, :suffixed
8
- def initialize( amount, options = {} )
9
- if options[:inverse]
10
- @amount = -1*amount.to_f
11
- else
12
- @amount = amount.to_f
13
- end
8
+ def initialize(amount, options = {})
9
+ @options = options
10
+ @amount_raw = amount
11
+ @raw = options[:raw]
12
+
13
+ @amount = parse(amount, options)
14
+ @amount = -@amount if options[:inverse]
14
15
  @currency = options[:currency] || "$"
15
16
  @suffixed = options[:suffixed]
16
17
  end
@@ -19,11 +20,19 @@ module Reckon
19
20
  return @amount
20
21
  end
21
22
 
23
+ def to_s
24
+ return @options[:raw] ? "#{@amount_raw} | #{@amount}" : @amount
25
+ end
26
+
27
+ # unary minus
28
+ # ex
29
+ # m = Money.new
30
+ # -m
22
31
  def -@
23
- Money.new( -@amount, :currency => @currency, :suffixed => @suffixed )
32
+ Money.new(-@amount, :currency => @currency, :suffixed => @suffixed)
24
33
  end
25
34
 
26
- def <=>( mon )
35
+ def <=>(mon)
27
36
  other_amount = mon.to_f
28
37
  if @amount < other_amount
29
38
  -1
@@ -34,42 +43,41 @@ module Reckon
34
43
  end
35
44
  end
36
45
 
37
- def pretty( negate = false )
38
- if @suffixed
39
- (@amount >= 0 ? " " : "") + sprintf("%0.2f #{@currency}", @amount * (negate ? -1 : 1))
40
- else
41
- (@amount >= 0 ? " " : "") + sprintf("%0.2f", @amount * (negate ? -1 : 1)).gsub(/^((\-)|)(?=\d)/, "\\1#{@currency}")
46
+ def pretty(negate = false)
47
+ if @raw
48
+ return @amount_raw unless negate
49
+
50
+ return @amount_raw[0] == '-' ? @amount_raw[1..-1] : "-#{@amount_raw}"
42
51
  end
52
+
53
+ amt = pretty_amount(@amount * (negate ? -1 : 1))
54
+ amt = if @suffixed
55
+ "#{amt} #{@currency}"
56
+ else
57
+ amt.gsub(/^((-)|)(?=\d)/, "\\1#{@currency}")
58
+ end
59
+
60
+ return (@amount >= 0 ? " " : "") + amt
61
+ end
62
+
63
+ def pretty_amount(amount)
64
+ sprintf("%0.2f", amount).reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
43
65
  end
44
66
 
45
- def Money::from_s( value, options = {} )
67
+ def parse(value, options = {})
68
+ value = value.to_s
46
69
  # Empty string is treated as money with value 0
47
- return Money.new( 0.00, options ) if value.empty?
48
-
49
- # Remove 1000 separaters and replace , with . if comma_separates_cents
50
- # 1.000,00 -> 1000.00
51
- value = value.gsub(/\./, '').gsub(/,/, '.') if options[:comma_separates_cents]
52
- value = value.gsub(/,/, '')
53
-
54
- money_format_regex = /^(.*?)(\d+\.\d\d)/ # Money has two decimal precision
55
- any_number_regex = /^(.*?)([\d\.]+)/
56
-
57
- # Prefer matching the money_format, match any number otherwise
58
- m = value.match( money_format_regex ) ||
59
- value.match( any_number_regex )
60
- if m
61
- amount = m[2].to_f
62
- # Check whether the money had a - or (, which indicates negative amounts
63
- if (m[1].match( /^[\(-]/ ) || m[1].match( /-$/ ))
64
- amount *= -1
65
- end
66
- return Money.new( amount, options )
67
- else
68
- return nil
69
- end
70
+ return value.to_f if value.to_s.empty?
71
+
72
+ invert = value.match(/^\(.*\)$/)
73
+ value = value.gsub(/[^0-9,.-]/, '')
74
+ value = value.tr('.', '').tr(',', '.') if options[:comma_separates_cents]
75
+ value = value.tr(',', '')
76
+ value = value.to_f
77
+ return invert ? -value : value
70
78
  end
71
79
 
72
- def Money::likelihood( entry )
80
+ def Money::likelihood(entry)
73
81
  money_score = 0
74
82
  # digits separated by , or . with no more than 2 trailing digits
75
83
  money_score += 40 if entry.match(/\d+[,.]\d{2}[^\d]*$/)
@@ -83,31 +91,30 @@ module Reckon
83
91
  end
84
92
 
85
93
  class MoneyColumn < Array
86
- def initialize( arr = [], options = {} )
87
- arr.each { |str| self.push( Money.from_s( str, options ) ) }
94
+ def initialize(arr = [], options = {})
95
+ arr.each { |str| push(Money.new(str, options)) }
88
96
  end
89
97
 
90
98
  def positive?
91
- self.each do |money|
92
- return false if money < 0 if money
99
+ each do |money|
100
+ return false if money && money < 0
93
101
  end
94
102
  true
95
103
  end
96
104
 
97
- def merge!( other_column )
105
+ def merge!(other_column)
98
106
  invert = false
99
- invert = true if self.positive? && other_column.positive?
100
- self.each_with_index do |mon, i|
107
+ invert = true if positive? && other_column.positive?
108
+ each_with_index do |mon, i|
101
109
  other = other_column[i]
102
- return nil if (!mon || !other)
103
- if mon != 0.00 && other == 0.0
104
- if invert
105
- self[i]= -mon
106
- end
107
- elsif mon == 0.00 && other != 0.00
110
+ return nil if !mon || !other
111
+
112
+ if mon != 0.0 && other == 0.0
113
+ self[i] = -mon if invert
114
+ elsif mon == 0.0 && other != 0.0
108
115
  self[i] = other
109
116
  else
110
- return nil
117
+ self[i] = Money.new(0)
111
118
  end
112
119
  end
113
120
  self