reckon 0.6.0 → 0.7.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (117) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +50 -0
  3. data/.gitignore +3 -0
  4. data/.ruby-version +1 -1
  5. data/CHANGELOG.md +75 -7
  6. data/Gemfile.lock +1 -1
  7. data/README.md +85 -24
  8. data/Rakefile +17 -1
  9. data/bin/build-new-version.sh +26 -0
  10. data/bin/reckon +9 -1
  11. data/lib/reckon.rb +1 -0
  12. data/lib/reckon/app.rb +18 -141
  13. data/lib/reckon/cosine_similarity.rb +67 -62
  14. data/lib/reckon/csv_parser.rb +2 -7
  15. data/lib/reckon/date_column.rb +10 -0
  16. data/lib/reckon/money.rb +59 -52
  17. data/lib/reckon/options.rb +153 -0
  18. data/lib/reckon/version.rb +1 -1
  19. data/spec/cosine_training_and_test.rb +52 -0
  20. data/spec/integration/another_bank_example/input.csv +9 -0
  21. data/spec/integration/another_bank_example/output.ledger +36 -0
  22. data/spec/integration/another_bank_example/test_args +1 -0
  23. data/spec/integration/ask_for_account/cli_input.exp +33 -0
  24. data/spec/integration/ask_for_account/expected_output +11 -0
  25. data/spec/integration/ask_for_account/input.csv +9 -0
  26. data/spec/integration/ask_for_account/test_args +1 -0
  27. data/spec/integration/austrian_example/input.csv +13 -0
  28. data/spec/integration/austrian_example/output.ledger +52 -0
  29. data/spec/integration/austrian_example/test_args +2 -0
  30. data/spec/integration/bom_utf8_file/input.csv +3 -0
  31. data/spec/integration/bom_utf8_file/output.ledger +4 -0
  32. data/spec/integration/bom_utf8_file/test_args +3 -0
  33. data/spec/integration/broker_canada_example/input.csv +12 -0
  34. data/spec/integration/broker_canada_example/output.ledger +48 -0
  35. data/spec/integration/broker_canada_example/test_args +1 -0
  36. data/spec/integration/chase/account_tokens_and_regex/output.ledger +36 -0
  37. data/spec/integration/chase/account_tokens_and_regex/test_args +2 -0
  38. data/spec/integration/chase/account_tokens_and_regex/tokens.yml +16 -0
  39. data/spec/integration/chase/default_account_names/output.ledger +36 -0
  40. data/spec/integration/chase/default_account_names/test_args +3 -0
  41. data/spec/integration/chase/input.csv +9 -0
  42. data/spec/integration/chase/learn_from_existing/learn.ledger +7 -0
  43. data/spec/integration/chase/learn_from_existing/output.ledger +36 -0
  44. data/spec/integration/chase/learn_from_existing/test_args +1 -0
  45. data/spec/integration/chase/simple/output.ledger +36 -0
  46. data/spec/integration/chase/simple/test_args +1 -0
  47. data/spec/integration/danish_kroner_nordea_example/input.csv +6 -0
  48. data/spec/integration/danish_kroner_nordea_example/output.ledger +24 -0
  49. data/spec/integration/danish_kroner_nordea_example/test_args +1 -0
  50. data/spec/integration/english_date_example/input.csv +3 -0
  51. data/spec/integration/english_date_example/output.ledger +12 -0
  52. data/spec/integration/english_date_example/test_args +1 -0
  53. data/spec/integration/extratofake/input.csv +24 -0
  54. data/spec/integration/extratofake/output.ledger +92 -0
  55. data/spec/integration/extratofake/test_args +1 -0
  56. data/spec/integration/french_example/input.csv +9 -0
  57. data/spec/integration/french_example/output.ledger +36 -0
  58. data/spec/integration/french_example/test_args +2 -0
  59. data/spec/integration/german_date_example/input.csv +3 -0
  60. data/spec/integration/german_date_example/output.ledger +12 -0
  61. data/spec/integration/german_date_example/test_args +1 -0
  62. data/spec/integration/harder_date_example/input.csv +5 -0
  63. data/spec/integration/harder_date_example/output.ledger +20 -0
  64. data/spec/integration/harder_date_example/test_args +1 -0
  65. data/spec/integration/ing/input.csv +3 -0
  66. data/spec/integration/ing/output.ledger +12 -0
  67. data/spec/integration/ing/test_args +1 -0
  68. data/spec/integration/intuit_mint_example/input.csv +7 -0
  69. data/spec/integration/intuit_mint_example/output.ledger +28 -0
  70. data/spec/integration/intuit_mint_example/test_args +1 -0
  71. data/spec/integration/invalid_header_example/input.csv +6 -0
  72. data/spec/integration/invalid_header_example/output.ledger +8 -0
  73. data/spec/integration/invalid_header_example/test_args +1 -0
  74. data/spec/integration/inversed_credit_card/input.csv +16 -0
  75. data/spec/integration/inversed_credit_card/output.ledger +64 -0
  76. data/spec/integration/inversed_credit_card/test_args +1 -0
  77. data/spec/integration/nationwide/input.csv +4 -0
  78. data/spec/integration/nationwide/output.ledger +16 -0
  79. data/spec/integration/nationwide/test_args +1 -0
  80. data/spec/integration/regression/issue_51_account_tokens/input.csv +8 -0
  81. data/spec/integration/regression/issue_51_account_tokens/output.ledger +32 -0
  82. data/spec/integration/regression/issue_51_account_tokens/test_args +4 -0
  83. data/spec/integration/regression/issue_51_account_tokens/tokens.yml +9 -0
  84. data/spec/integration/regression/issue_64_date_column/input.csv +3 -0
  85. data/spec/integration/regression/issue_64_date_column/output.ledger +8 -0
  86. data/spec/integration/regression/issue_64_date_column/test_args +1 -0
  87. data/spec/integration/regression/issue_73_account_token_matching/input.csv +2 -0
  88. data/spec/integration/regression/issue_73_account_token_matching/output.ledger +4 -0
  89. data/spec/integration/regression/issue_73_account_token_matching/test_args +6 -0
  90. data/spec/integration/regression/issue_73_account_token_matching/tokens.yml +8 -0
  91. data/spec/integration/regression/issue_85_date_example/input.csv +2 -0
  92. data/spec/integration/regression/issue_85_date_example/output.ledger +8 -0
  93. data/spec/integration/regression/issue_85_date_example/test_args +1 -0
  94. data/spec/integration/spanish_date_example/input.csv +3 -0
  95. data/spec/integration/spanish_date_example/output.ledger +12 -0
  96. data/spec/integration/spanish_date_example/test_args +1 -0
  97. data/spec/integration/suntrust/input.csv +7 -0
  98. data/spec/integration/suntrust/output.ledger +28 -0
  99. data/spec/integration/suntrust/test_args +1 -0
  100. data/spec/integration/test.sh +123 -0
  101. data/spec/integration/test_money_column/input.csv +3 -0
  102. data/spec/integration/test_money_column/output.ledger +8 -0
  103. data/spec/integration/test_money_column/test_args +1 -0
  104. data/spec/integration/two_money_columns/input.csv +5 -0
  105. data/spec/integration/two_money_columns/output.ledger +20 -0
  106. data/spec/integration/two_money_columns/test_args +1 -0
  107. data/spec/integration/yyyymmdd_date_example/input.csv +1 -0
  108. data/spec/integration/yyyymmdd_date_example/output.ledger +4 -0
  109. data/spec/integration/yyyymmdd_date_example/test_args +1 -0
  110. data/spec/reckon/app_spec.rb +24 -6
  111. data/spec/reckon/csv_parser_spec.rb +3 -3
  112. data/spec/reckon/money_column_spec.rb +24 -24
  113. data/spec/reckon/money_spec.rb +15 -34
  114. data/spec/reckon/options_spec.rb +17 -0
  115. data/spec/spec_helper.rb +6 -1
  116. metadata +102 -7
  117. data/.travis.yml +0 -13
data/lib/reckon.rb CHANGED
@@ -16,4 +16,5 @@ require_relative 'reckon/date_column'
16
16
  require_relative 'reckon/money'
17
17
  require_relative 'reckon/ledger_parser'
18
18
  require_relative 'reckon/csv_parser'
19
+ require_relative 'reckon/options'
19
20
  require_relative 'reckon/app'
data/lib/reckon/app.rb CHANGED
@@ -8,9 +8,10 @@ module Reckon
8
8
  attr_accessor :options, :seen, :csv_parser, :regexps, :matcher
9
9
  @@cli = HighLine.new
10
10
 
11
- def initialize(options = {})
11
+ def initialize(opts = {})
12
+ self.options = opts
12
13
  LOGGER.level = Logger::INFO if options[:verbose]
13
- self.options = options
14
+
14
15
  self.regexps = {}
15
16
  self.seen = Set.new
16
17
  self.options[:currency] ||= '$'
@@ -19,10 +20,10 @@ module Reckon
19
20
  learn!
20
21
  end
21
22
 
22
- def interactive_output(str)
23
+ def interactive_output(str, fh = $stdout)
23
24
  return if options[:unattended]
24
25
 
25
- puts str
26
+ fh.puts str
26
27
  end
27
28
 
28
29
  def learn!
@@ -157,10 +158,10 @@ module Reckon
157
158
  :money => @csv_parser.money_for(index),
158
159
  :description => @csv_parser.description_for(index) }
159
160
  end
160
- rows.sort_by { |n| n[:date] }.each { |row| yield row }
161
+ rows.sort_by { |n| [n[:date], -n[:money], n[:description]] }.each { |row| yield row }
161
162
  end
162
163
 
163
- def print_transaction(rows)
164
+ def print_transaction(rows, fh = $stdout)
164
165
  str = "\n"
165
166
  header = %w[Date Amount Description Note]
166
167
  maxes = header.map(&:length)
@@ -184,7 +185,7 @@ module Reckon
184
185
  str += "\n"
185
186
  end
186
187
 
187
- interactive_output str
188
+ interactive_output str, fh
188
189
  end
189
190
 
190
191
  def ask_account_question(msg, row)
@@ -192,11 +193,13 @@ module Reckon
192
193
  LOGGER.info "possible_answers===> #{possible_answers.inspect}"
193
194
 
194
195
  if options[:unattended]
195
- default = if row[:pretty_money][0] == '-'
196
- options[:default_into_account] || 'Expenses:Unknown'
197
- else
198
- options[:default_outof_account] || 'Income:Unknown'
199
- end
196
+ if options[:fail_on_unknown_account] && possible_answers.empty?
197
+ raise %(Couldn't find any matches for '#{row[:description]}'
198
+ Try adding an account token with --account-tokens)
199
+ end
200
+
201
+ default = options[:default_outof_account]
202
+ default = options[:default_into_account] if row[:pretty_money][0] == '-'
200
203
  return possible_answers[0] || default
201
204
  end
202
205
 
@@ -252,7 +255,7 @@ module Reckon
252
255
  end
253
256
 
254
257
  def ledger_format(row, line1, line2)
255
- out = "#{row[:pretty_date]}\t#{row[:description]}\t; #{row[:note]}\n"
258
+ out = "#{row[:pretty_date]}\t#{row[:description]}#{row[:note] ? "\t; " + row[:note]: ""}\n"
256
259
  out += "\t#{line1.first}\t\t\t#{line1.last}\n"
257
260
  out += "\t#{line2.first}\t\t\t#{line2.last}\n\n"
258
261
  out
@@ -277,138 +280,12 @@ module Reckon
277
280
  exit
278
281
  end
279
282
 
280
- def output_table
283
+ def output_table(fh = $stdout)
281
284
  rows = []
282
285
  each_row_backwards do |row|
283
286
  rows << row
284
287
  end
285
- print_transaction(rows)
286
- end
287
-
288
- def self.parse_opts(args = ARGV)
289
- options = { :output_file => STDOUT }
290
- parser = OptionParser.new do |opts|
291
- opts.banner = "Usage: Reckon.rb [options]"
292
- opts.separator ""
293
-
294
- opts.on("-f", "--file FILE", "The CSV file to parse") do |file|
295
- options[:file] = file
296
- end
297
-
298
- opts.on("-a", "--account NAME", "The Ledger Account this file is for") do |a|
299
- options[:bank_account] = a
300
- end
301
-
302
- opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
303
- options[:verbose] = v
304
- end
305
-
306
- opts.on("-i", "--inverse", "Use the negative of each amount") do |v|
307
- options[:inverse] = v
308
- end
309
-
310
- opts.on("-p", "--print-table", "Print out the parsed CSV in table form") do |p|
311
- options[:print_table] = p
312
- end
313
-
314
- opts.on("-o", "--output-file FILE", "The ledger file to append to") do |o|
315
- options[:output_file] = File.open(o, 'a')
316
- end
317
-
318
- opts.on("-l", "--learn-from FILE", "An existing ledger file to learn accounts from") do |l|
319
- options[:existing_ledger_file] = l
320
- end
321
-
322
- opts.on("", "--ignore-columns 1,2,5", "Columns to ignore in the CSV file - the first column is column 1") do |ignore|
323
- options[:ignore_columns] = ignore.split(",").map { |i| i.to_i }
324
- end
325
-
326
- opts.on("", "--money-column 2", Integer, "Specify the money column instead of letting Reckon guess - the first column is column 1") do |column_number|
327
- options[:money_column] = column_number
328
- end
329
-
330
- opts.on("", "--date-column 3", Integer, "Specify the date column instead of letting Reckon guess - the first column is column 1") do |column_number|
331
- options[:date_column] = column_number
332
- end
333
-
334
- opts.on("", "--contains-header [N]", "The first row of the CSV is a header and should be skipped. Optionally add the number of rows to skip.") do |contains_header|
335
- options[:contains_header] = 1
336
- options[:contains_header] = contains_header.to_i if contains_header
337
- end
338
-
339
- opts.on("", "--csv-separator ','", "Separator for parsing the CSV - default is comma.") do |csv_separator|
340
- options[:csv_separator] = csv_separator
341
- end
342
-
343
- opts.on("", "--comma-separates-cents", "Use comma instead of period to deliminate dollars from cents when parsing ($100,50 instead of $100.50)") do |c|
344
- options[:comma_separates_cents] = c
345
- end
346
-
347
- opts.on("", "--encoding 'UTF-8'", "Specify an encoding for the CSV file; not usually needed") do |e|
348
- options[:encoding] = e
349
- end
350
-
351
- opts.on("-c", "--currency '$'", "Currency symbol to use, defaults to $ (£, EUR)") do |e|
352
- options[:currency] = e
353
- end
354
-
355
- opts.on("", "--date-format '%d/%m/%Y'", "Force the date format (see Ruby DateTime strftime)") do |d|
356
- options[:date_format] = d
357
- end
358
-
359
- opts.on("-u", "--unattended", "Don't ask questions and guess all the accounts automatically. Used with --learn-from or --account-tokens options.") do |n|
360
- options[:unattended] = n
361
- end
362
-
363
- opts.on("-t", "--account-tokens FILE", "YAML file with manually-assigned tokens for each account (see README)") do |a|
364
- options[:account_tokens_file] = a
365
- end
366
-
367
- opts.on("", "--default-into-account NAME", "Default into account") do |a|
368
- options[:default_into_account] = a
369
- end
370
-
371
- opts.on("", "--default-outof-account NAME", "Default 'out of' account") do |a|
372
- options[:default_outof_account] = a
373
- end
374
-
375
- opts.on("", "--suffixed", "If --currency should be used as a suffix. Defaults to false.") do |e|
376
- options[:suffixed] = e
377
- end
378
-
379
- opts.on_tail("-h", "--help", "Show this message") do
380
- puts opts
381
- exit
382
- end
383
-
384
- opts.on_tail("--version", "Show version") do
385
- puts VERSION
386
- exit
387
- end
388
-
389
- opts.parse!(args)
390
- end
391
-
392
- unless options[:file]
393
- options[:file] = @@cli.ask("What CSV file should I parse? ")
394
- unless options[:file].length > 0
395
- puts "\nYou must provide a CSV file to parse.\n"
396
- puts parser
397
- exit
398
- end
399
- end
400
-
401
- unless options[:bank_account]
402
- fail "Please specify --account for the unattended mode" if options[:unattended]
403
-
404
- options[:bank_account] = @@cli.ask("What is the account name of this bank account in Ledger? ") do |q|
405
- q.readline = true
406
- q.validate = /^.{2,}$/
407
- q.default = "Assets:Bank:Checking"
408
- end
409
- end
410
-
411
- options
288
+ print_transaction(rows, fh)
412
289
  end
413
290
  end
414
291
  end
@@ -1,47 +1,52 @@
1
1
  require 'matrix'
2
2
  require 'set'
3
3
 
4
- # Implementation of consine similarity using TF-IDF for vectorization.
5
- # Used to suggest which account a transaction should be assigned to
4
+ # Implementation of cosine similarity using TF-IDF for vectorization.
5
+ #
6
+ # In information retrieval, tf–idf, short for term frequency–inverse document frequency,
7
+ # is a numerical statistic that is intended to reflect how important a word is to a
8
+ # document in a collection or corpus
9
+ #
10
+ # Cosine Similarity a measurement to determine how similar 2 documents are to each other.
11
+ #
12
+ # These weights and measures are used to suggest which account a transaction should be
13
+ # assigned to.
6
14
  module Reckon
7
15
  class CosineSimilarity
16
+ DocumentInfo = Struct.new(:tokens, :accounts)
17
+
8
18
  def initialize(options)
19
+ @docs = DocumentInfo.new({}, {})
9
20
  @options = options
10
- @tokens = {}
11
- @accounts = Hash.new(0)
12
21
  end
13
22
 
14
23
  def add_document(account, doc)
15
- tokenize(doc).each do |n|
24
+ tokens = tokenize(doc)
25
+ LOGGER.info "doc tokens: #{tokens}"
26
+ tokens.each do |n|
16
27
  (token, count) = n
17
28
 
18
- @tokens[token] ||= {}
19
- @tokens[token][account] ||= 0
20
- @tokens[token][account] += count
21
- @accounts[account] += count
29
+ @docs.tokens[token] ||= Hash.new(0)
30
+ @docs.tokens[token][account] += count
31
+ @docs.accounts[account] ||= Hash.new(0)
32
+ @docs.accounts[account][token] += count
22
33
  end
23
34
  end
24
35
 
25
36
  # find most similar documents to query
26
37
  def find_similar(query)
27
- (query_scores, corpus_scores) = td_idf_scores_for(query)
38
+ LOGGER.info "find_similar #{query}"
28
39
 
29
- query_vector = Vector.elements(query_scores, false)
40
+ accounts = docs_to_check(query).map do |a|
41
+ [a, tfidf(@docs.accounts[a])]
42
+ end
30
43
 
31
- # For each doc, calculate the similarity to the query
32
- suggestions = corpus_scores.map do |account, scores|
33
- acct_vector = Vector.elements(scores, false)
44
+ q = tfidf(tokenize(query))
34
45
 
35
- acct_query_dp = acct_vector.inner_product(query_vector)
36
- # similarity is a float between 1 and -1, where 1 is exactly the same and -1 is
37
- # exactly opposite
38
- # see https://en.wikipedia.org/wiki/Cosine_similarity
39
- # cos(theta) = (A . B) / (||A|| ||B||)
40
- # where A . B is the "dot product" and ||A|| is the magnitude of A
41
- # ruby has the 'matrix' library we can use to do these calculations.
46
+ suggestions = accounts.map do |a, d|
42
47
  {
43
- similarity: acct_query_dp / (acct_vector.magnitude * query_vector.magnitude),
44
- account: account,
48
+ similarity: calc_similarity(q, d),
49
+ account: a
45
50
  }
46
51
  end.select { |n| n[:similarity] > 0 }.sort_by { |n| -n[:similarity] }
47
52
 
@@ -52,50 +57,51 @@ module Reckon
52
57
 
53
58
  private
54
59
 
55
- def td_idf_scores_for(query)
56
- query_tokens = tokenize(query)
57
- corpus = Set.new
58
- corpus_scores = {}
59
- query_scores = []
60
- num_docs = @accounts.length
61
-
62
- query_tokens.each do |n|
63
- (token, _count) = n
64
- next unless @tokens[token]
65
- corpus = corpus.union(Set.new(@tokens[token].keys))
60
+ def docs_to_check(query)
61
+ return tokenize(query).reduce(Set.new) do |corpus, t|
62
+ corpus.union(Set.new(@docs.tokens[t[0]]&.keys))
66
63
  end
64
+ end
67
65
 
68
- query_tokens.each do |n|
69
- (token, count) = n
70
-
71
- # if no other docs have token, ignore it
72
- next unless @tokens[token]
66
+ def tfidf(tokens)
67
+ scores = {}
73
68
 
74
- ## First, calculate scores for our query as we're building scores for the corpus
75
- query_scores << calc_tf_idf(
76
- count,
77
- query_tokens.length,
78
- @tokens[token].length,
79
- num_docs
69
+ tokens.each do |t, n|
70
+ scores[t] = calc_tf_idf(
71
+ n,
72
+ tokens.length,
73
+ @docs.tokens[t]&.length&.to_f || 0,
74
+ @docs.accounts.length
80
75
  )
81
-
82
- ## Next, calculate for the corpus, where our "account" is a document
83
- corpus.each do |account|
84
- corpus_scores[account] ||= []
85
-
86
- corpus_scores[account] << calc_tf_idf(
87
- (@tokens[token][account] || 0),
88
- @accounts[account].to_f,
89
- @tokens[token].length.to_f,
90
- num_docs
91
- )
92
- end
93
76
  end
94
- [query_scores, corpus_scores]
77
+
78
+ return scores
95
79
  end
96
80
 
97
- def calc_tf_idf(token_count, num_words_in_doc, df, num_docs)
81
+ # Cosine similarity is used to compare how similar 2 documents are. Returns a float
82
+ # between 1 and -1, where 1 is exactly the same and -1 is exactly opposite.
83
+ #
84
+ # see https://en.wikipedia.org/wiki/Cosine_similarity
85
+ # cos(theta) = (A . B) / (||A|| ||B||)
86
+ # where A . B is the "dot product" and ||A|| is the magnitude of A
87
+ #
88
+ # The variables A and B are the set of unique terms in q and d.
89
+ #
90
+ # For example, when q = "big red balloon" and d ="small green balloon" then the
91
+ # variables are (big,red,balloon,small,green) and a = (1,1,1,0,0) and b =
92
+ # (0,0,1,1,1).
93
+ #
94
+ # query and doc are hashes of token => tf/idf score
95
+ def calc_similarity(query, doc)
96
+ tokens = Set.new(query.keys + doc.keys)
97
+
98
+ a = Vector.elements(tokens.map { |n| query[n] || 0 }, false)
99
+ b = Vector.elements(tokens.map { |n| doc[n] || 0 }, false)
100
+
101
+ return a.inner_product(b) / (a.magnitude * b.magnitude)
102
+ end
98
103
 
104
+ def calc_tf_idf(token_count, num_words_in_doc, df, num_docs)
99
105
  # tf(t,d) = count of t in d / number of words in d
100
106
  tf = token_count / num_words_in_doc.to_f
101
107
 
@@ -109,14 +115,13 @@ module Reckon
109
115
  end
110
116
 
111
117
  def tokenize(str)
112
- mk_tokens(str).inject(Hash.new(0)) do |memo, n|
118
+ mk_tokens(str).each_with_object(Hash.new(0)) do |n, memo|
113
119
  memo[n] += 1
114
- memo
115
120
  end.to_a
116
121
  end
117
122
 
118
123
  def mk_tokens(str)
119
- str.downcase.tr(';', ' ').tr("'", '').split(/[^a-z0-9.]+/)
124
+ str.downcase.tr(';', ' ').tr("'", '').split(/[^a-z0-9.]+/).reject(&:empty?)
120
125
  end
121
126
  end
122
127
  end
@@ -89,12 +89,7 @@ module Reckon
89
89
  money_score += Money::likelihood( entry )
90
90
  possible_neg_money_count += 1 if entry =~ /^\$?[\-\(]\$?\d+/
91
91
  possible_pos_money_count += 1 if entry =~ /^\+?\$?\+?\d+/
92
- date_score += 10 if entry =~ /\b(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)/i
93
- date_score += 5 if entry =~ /^[\-\/\.\d:\[\]]+$/
94
- date_score += entry.gsub(/[^\-\/\.\d:\[\]]/, '').length if entry.gsub(/[^\-\/\.\d:\[\]]/, '').length > 3
95
- date_score -= entry.gsub(/[\-\/\.\d:\[\]]/, '').length
96
- date_score += 30 if entry =~ /^\d+[:\/\.-]\d+[:\/\.-]\d+([ :]\d+[:\/\.]\d+)?$/
97
- date_score += 10 if entry =~ /^\d+\[\d+:GMT\]$/i
92
+ date_score += DateColumn.likelihood(entry)
98
93
 
99
94
  # Try to determine if this is a balance column
100
95
  entry_as_num = entry.gsub(/[^\-\d\.]/, '').to_f
@@ -168,7 +163,7 @@ module Reckon
168
163
  results = evaluate_columns(columns)
169
164
 
170
165
  if options[:money_column]
171
- self.money_column_indices = [ options[:money_column] - 1 ]
166
+ self.money_column_indices = [options[:money_column] - 1]
172
167
  else
173
168
  self.money_column_indices = results.select { |n| n[:is_money_column] }.map { |n| n[:index] }
174
169
  if self.money_column_indices.length == 1
@@ -56,5 +56,15 @@ module Reckon
56
56
  date.iso8601
57
57
  end
58
58
 
59
+ def self.likelihood(entry)
60
+ date_score = 0
61
+ date_score += 10 if entry =~ /\b(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)/i
62
+ date_score += 5 if entry =~ /^[\-\/\.\d:\[\]]+$/
63
+ date_score += entry.gsub(/[^\-\/\.\d:\[\]]/, '').length if entry.gsub(/[^\-\/\.\d:\[\]]/, '').length > 3
64
+ date_score -= entry.gsub(/[\-\/\.\d:\[\]]/, '').length
65
+ date_score += 30 if entry =~ /^\d+[:\/\.-]\d+[:\/\.-]\d+([ :]\d+[:\/\.]\d+)?$/
66
+ date_score += 10 if entry =~ /^\d+\[\d+:GMT\]$/i
67
+ return date_score
68
+ end
59
69
  end
60
70
  end
data/lib/reckon/money.rb CHANGED
@@ -5,12 +5,13 @@ module Reckon
5
5
  class Money
6
6
  include Comparable
7
7
  attr_accessor :amount, :currency, :suffixed
8
- def initialize( amount, options = {} )
9
- if options[:inverse]
10
- @amount = -1*amount.to_f
11
- else
12
- @amount = amount.to_f
13
- end
8
+ def initialize(amount, options = {})
9
+ @options = options
10
+ @amount_raw = amount
11
+ @raw = options[:raw]
12
+
13
+ @amount = parse(amount, options)
14
+ @amount = -@amount if options[:inverse]
14
15
  @currency = options[:currency] || "$"
15
16
  @suffixed = options[:suffixed]
16
17
  end
@@ -19,11 +20,19 @@ module Reckon
19
20
  return @amount
20
21
  end
21
22
 
23
+ def to_s
24
+ return @options[:raw] ? "#{@amount_raw} | #{@amount}" : @amount
25
+ end
26
+
27
+ # unary minus
28
+ # ex
29
+ # m = Money.new
30
+ # -m
22
31
  def -@
23
- Money.new( -@amount, :currency => @currency, :suffixed => @suffixed )
32
+ Money.new(-@amount, :currency => @currency, :suffixed => @suffixed)
24
33
  end
25
34
 
26
- def <=>( mon )
35
+ def <=>(mon)
27
36
  other_amount = mon.to_f
28
37
  if @amount < other_amount
29
38
  -1
@@ -34,42 +43,41 @@ module Reckon
34
43
  end
35
44
  end
36
45
 
37
- def pretty( negate = false )
38
- if @suffixed
39
- (@amount >= 0 ? " " : "") + sprintf("%0.2f #{@currency}", @amount * (negate ? -1 : 1))
40
- else
41
- (@amount >= 0 ? " " : "") + sprintf("%0.2f", @amount * (negate ? -1 : 1)).gsub(/^((\-)|)(?=\d)/, "\\1#{@currency}")
46
+ def pretty(negate = false)
47
+ if @raw
48
+ return @amount_raw unless negate
49
+
50
+ return @amount_raw[0] == '-' ? @amount_raw[1..-1] : "-#{@amount_raw}"
42
51
  end
52
+
53
+ amt = pretty_amount(@amount * (negate ? -1 : 1))
54
+ amt = if @suffixed
55
+ "#{amt} #{@currency}"
56
+ else
57
+ amt.gsub(/^((-)|)(?=\d)/, "\\1#{@currency}")
58
+ end
59
+
60
+ return (@amount >= 0 ? " " : "") + amt
61
+ end
62
+
63
+ def pretty_amount(amount)
64
+ sprintf("%0.2f", amount).reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
43
65
  end
44
66
 
45
- def Money::from_s( value, options = {} )
67
+ def parse(value, options = {})
68
+ value = value.to_s
46
69
  # Empty string is treated as money with value 0
47
- return Money.new( 0.00, options ) if value.empty?
48
-
49
- # Remove 1000 separaters and replace , with . if comma_separates_cents
50
- # 1.000,00 -> 1000.00
51
- value = value.gsub(/\./, '').gsub(/,/, '.') if options[:comma_separates_cents]
52
- value = value.gsub(/,/, '')
53
-
54
- money_format_regex = /^(.*?)(\d+\.\d\d)/ # Money has two decimal precision
55
- any_number_regex = /^(.*?)([\d\.]+)/
56
-
57
- # Prefer matching the money_format, match any number otherwise
58
- m = value.match( money_format_regex ) ||
59
- value.match( any_number_regex )
60
- if m
61
- amount = m[2].to_f
62
- # Check whether the money had a - or (, which indicates negative amounts
63
- if (m[1].match( /^[\(-]/ ) || m[1].match( /-$/ ))
64
- amount *= -1
65
- end
66
- return Money.new( amount, options )
67
- else
68
- return nil
69
- end
70
+ return value.to_f if value.to_s.empty?
71
+
72
+ invert = value.match(/^\(.*\)$/)
73
+ value = value.gsub(/[^0-9,.-]/, '')
74
+ value = value.tr('.', '').tr(',', '.') if options[:comma_separates_cents]
75
+ value = value.tr(',', '')
76
+ value = value.to_f
77
+ return invert ? -value : value
70
78
  end
71
79
 
72
- def Money::likelihood( entry )
80
+ def Money::likelihood(entry)
73
81
  money_score = 0
74
82
  # digits separated by , or . with no more than 2 trailing digits
75
83
  money_score += 40 if entry.match(/\d+[,.]\d{2}[^\d]*$/)
@@ -83,31 +91,30 @@ module Reckon
83
91
  end
84
92
 
85
93
  class MoneyColumn < Array
86
- def initialize( arr = [], options = {} )
87
- arr.each { |str| self.push( Money.from_s( str, options ) ) }
94
+ def initialize(arr = [], options = {})
95
+ arr.each { |str| push(Money.new(str, options)) }
88
96
  end
89
97
 
90
98
  def positive?
91
- self.each do |money|
92
- return false if money < 0 if money
99
+ each do |money|
100
+ return false if money && money < 0
93
101
  end
94
102
  true
95
103
  end
96
104
 
97
- def merge!( other_column )
105
+ def merge!(other_column)
98
106
  invert = false
99
- invert = true if self.positive? && other_column.positive?
100
- self.each_with_index do |mon, i|
107
+ invert = true if positive? && other_column.positive?
108
+ each_with_index do |mon, i|
101
109
  other = other_column[i]
102
- return nil if (!mon || !other)
103
- if mon != 0.00 && other == 0.0
104
- if invert
105
- self[i]= -mon
106
- end
107
- elsif mon == 0.00 && other != 0.00
110
+ return nil if !mon || !other
111
+
112
+ if mon != 0.0 && other == 0.0
113
+ self[i] = -mon if invert
114
+ elsif mon == 0.0 && other != 0.0
108
115
  self[i] = other
109
116
  else
110
- return nil
117
+ self[i] = Money.new(0)
111
118
  end
112
119
  end
113
120
  self