reckon 0.9.0 → 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 26126e1e4ead4fcd93a94093f8e4a4fd557a9c46ccc6983198d4bdd516e639ee
4
- data.tar.gz: f196a86ca58ebaeee4f27030bb21af9fd3c3001d20d06b74f2063ea42d96cfcc
3
+ metadata.gz: 03c20b48d4333969c8304a5bb9a3c01fc6053050ab9146329ce14ae6a9886b38
4
+ data.tar.gz: 27a2ce4e8db5c7818cc4cefb19f180a7c727190f0a990403f565fad503e749a9
5
5
  SHA512:
6
- metadata.gz: 02ad471caf5a5b6b69d98cde9cfa5e0579bbe680f50bb7fa8c1f5b3a7908018a48075531674a27d23139913be9156f889cac8010e06de87439c0665b064d7171
7
- data.tar.gz: a8abf375fab7ba91d31a0d05ee372a5fc788feca0fe6a61859cb4ed6c72a3387238a4df2d2dac2ce300fe4f047142ea18bbe03a364e008e2b9588143f0223852
6
+ metadata.gz: 2f569b3d5cf4038714065a6d184d6c07f57d10598e5efc610eeb9919e8b18c65aff5e5329ab89a9ed30f72cabce9d11f5645af4d0df3bda6d05ad9afd988f7e7
7
+ data.tar.gz: 1783a63ba138c2b87a0756d6b9bcfbce068daf977e582a4c920a37ff50358328f8514f308dbbf932ef5cc4111e9e52dadfaed5876b9d30f4759d4a1eb31299fa
@@ -9,7 +9,9 @@ name: Build Status
9
9
 
10
10
  on:
11
11
  push:
12
+ branches: [ master ]
12
13
  pull_request:
14
+ branches: [ master ]
13
15
 
14
16
  jobs:
15
17
  test:
data/.rubocop.yml ADDED
@@ -0,0 +1,20 @@
1
+ Layout/LineLength:
2
+ Max: 88
3
+
4
+ Style/StringLiterals:
5
+ Enabled: false
6
+
7
+ Style/RedundantReturn:
8
+ Enabled: false
9
+
10
+ Metrics/ClassLength:
11
+ Enabled: False
12
+
13
+ Metrics/MethodLength:
14
+ Enabled: False
15
+
16
+ Metrics/AbcSize:
17
+ Enabled: False
18
+
19
+ Style/NumericPredicate:
20
+ Enabled: False
data/CHANGELOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  # Changelog
2
2
 
3
+ ## [v0.9.1](https://github.com/cantino/reckon/tree/v0.9.1) (2023-03-19)
4
+
5
+ [Full Changelog](https://github.com/cantino/reckon/compare/v0.9.0...v0.9.1)
6
+
7
+ **Closed issues:**
8
+
9
+ - More than one column support [\#120](https://github.com/cantino/reckon/issues/120)
10
+ - Beancount support [\#119](https://github.com/cantino/reckon/issues/119)
11
+ - Problem with importing CSV [\#60](https://github.com/cantino/reckon/issues/60)
12
+
3
13
  ## [v0.9.0](https://github.com/cantino/reckon/tree/v0.9.0) (2023-02-23)
4
14
 
5
15
  [Full Changelog](https://github.com/cantino/reckon/compare/v0.9.0-beta...v0.9.0)
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- reckon (0.9.0)
4
+ reckon (0.9.1)
5
5
  chronic (>= 0.3.0)
6
6
  highline (>= 1.5.2)
7
7
  matrix (>= 0.4.2)
data/Rakefile CHANGED
@@ -13,10 +13,10 @@ task :test_all do
13
13
  puts "Running unit tests"
14
14
  Rake::Task["spec"].invoke
15
15
  puts "Running integration tests"
16
- Rake::Task["integration_tests"].invoke
16
+ Rake::Task["test_integration"].invoke
17
17
  end
18
18
 
19
- task :integration_tests do
19
+ task :test_integration do
20
20
  cmd = 'prove -v ./spec/integration/test.sh'
21
21
  raise 'Integration tests failed' unless system(cmd)
22
22
  end
@@ -1,6 +1,6 @@
1
1
  #!/bin/bash
2
2
 
3
- set -e
3
+ set -xe
4
4
 
5
5
  VERSION=$1
6
6
 
@@ -8,7 +8,7 @@ echo "Install github_changelog_generator"
8
8
  gem install --user github_changelog_generator
9
9
 
10
10
  echo "Update 'lib/reckon/version.rb'"
11
- echo -e "module Reckon\n VERSION=\"$VERSION\"\nend" > lib/reckon/version.rb
11
+ echo -e "module Reckon\n VERSION = \"$VERSION\"\nend" > lib/reckon/version.rb
12
12
  echo "Run `bundle install` to build updated Gemfile.lock"
13
13
  bundle install
14
14
  echo "Run changelog generator (requires $TOKEN to be your github token)"
@@ -24,3 +24,4 @@ echo "Push changes and tags"
24
24
  echo "git push && git push --tags"
25
25
  echo "Push new gem"
26
26
  echo "gem push reckon-$VERSION.gem"
27
+ gh release create v$VERSION reckon-$VERSION.gem --draft --generate-notes
data/bin/reckon CHANGED
@@ -4,7 +4,7 @@ require 'rubygems'
4
4
  require 'reckon'
5
5
 
6
6
  begin
7
- options = Reckon::Options.parse
7
+ options = Reckon::Options.parse_command_line_options
8
8
  rescue RuntimeError => e
9
9
  puts("ERROR: #{e}")
10
10
  exit(1)
data/lib/reckon/app.rb CHANGED
@@ -1,12 +1,12 @@
1
- # coding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
- require 'pp'
4
3
  require 'yaml'
4
+ require 'stringio'
5
5
 
6
6
  module Reckon
7
+ # The main app
7
8
  class App
8
9
  attr_accessor :options, :seen, :csv_parser, :regexps, :matcher
9
- @@cli = HighLine.new
10
10
 
11
11
  def initialize(opts = {})
12
12
  self.options = opts
@@ -14,9 +14,10 @@ module Reckon
14
14
 
15
15
  self.regexps = {}
16
16
  self.seen = Set.new
17
- self.options[:currency] ||= '$'
18
- @csv_parser = CSVParser.new( options )
17
+ @cli = HighLine.new
18
+ @csv_parser = CSVParser.new(options)
19
19
  @matcher = CosineSimilarity.new(options)
20
+ @parser = options[:format] =~ /beancount/i ? BeancountParser.new : LedgerParser.new
20
21
  learn!
21
22
  end
22
23
 
@@ -26,9 +27,13 @@ module Reckon
26
27
  fh.puts str
27
28
  end
28
29
 
30
+ # Learn from previous transactions. Used to recommend accounts for a transaction.
29
31
  def learn!
30
32
  learn_from_account_tokens(options[:account_tokens_file])
31
33
  learn_from_ledger_file(options[:existing_ledger_file])
34
+ # TODO: make this work
35
+ # this doesn't work because output_file is an IO object
36
+ # learn_from_ledger_file(options[:output_file]) if File.exist?(options[:output_file])
32
37
  end
33
38
 
34
39
  def learn_from_account_tokens(filename)
@@ -52,12 +57,13 @@ module Reckon
52
57
 
53
58
  raise "#{ledger_file} doesn't exist!" unless File.exist?(ledger_file)
54
59
 
55
- learn_from_ledger(File.read(ledger_file))
60
+ learn_from_ledger(File.new(ledger_file))
56
61
  end
57
62
 
63
+ # Takes an IO-like object
58
64
  def learn_from_ledger(ledger)
59
65
  LOGGER.info "learning from #{ledger}"
60
- LedgerParser.new(ledger).entries.each do |entry|
66
+ @parser.parse(ledger).each do |entry|
61
67
  entry[:accounts].each do |account|
62
68
  str = [entry[:desc], account[:amount]].join(" ")
63
69
  if account[:name] != options[:bank_account]
@@ -84,7 +90,7 @@ module Reckon
84
90
  merged_acct = [account, k].compact.join(':')
85
91
  extract_account_tokens(v, merged_acct)
86
92
  end
87
- at.inject({}) { |memo, e| memo.merge!(e)}
93
+ at.inject({}) { |memo, e| memo.merge!(e) }
88
94
  end
89
95
  end
90
96
 
@@ -92,6 +98,7 @@ module Reckon
92
98
  # https://github.com/tenderlove/psych/blob/master/lib/psych/visitors/to_ruby.rb
93
99
  match = regex_str.match(/^\/(.*)\/([ix]*)$/m)
94
100
  fail "failed to parse regexp #{regex_str}" unless match
101
+
95
102
  options = 0
96
103
  (match[2] || '').split('').each do |option|
97
104
  case option
@@ -120,13 +127,16 @@ module Reckon
120
127
 
121
128
  if row[:money] > 0
122
129
  # out_of_account
123
- answer = ask_account_question("Which account provided this income? (#{cmd_options})", row)
130
+ answer = ask_account_question(
131
+ "Which account provided this income? (#{cmd_options})", row
132
+ )
124
133
  line1 = [options[:bank_account], row[:pretty_money]]
125
134
  line2 = [answer, ""]
126
135
  else
127
136
  # into_account
128
- answer = ask_account_question("To which account did this money go? (#{cmd_options})", row)
129
- # line1 = [answer, row[:pretty_money_negated]]
137
+ answer = ask_account_question(
138
+ "To which account did this money go? (#{cmd_options})", row
139
+ )
130
140
  line1 = [answer, ""]
131
141
  line2 = [options[:bank_account], row[:pretty_money]]
132
142
  end
@@ -137,9 +147,9 @@ module Reckon
137
147
  next
138
148
  end
139
149
 
140
- ledger = ledger_format(row, line1, line2)
150
+ ledger = @parser.format_row(row, line1, line2)
141
151
  LOGGER.info "ledger line: #{ledger}"
142
- learn_from_ledger(ledger) unless options[:account_tokens_file]
152
+ learn_from_ledger(StringIO.new(ledger)) unless options[:account_tokens_file]
143
153
  output(ledger)
144
154
  end
145
155
  end
@@ -203,7 +213,7 @@ module Reckon
203
213
  return possible_answers[0] || default
204
214
  end
205
215
 
206
- answer = @@cli.ask(msg) do |q|
216
+ answer = @cli.ask(msg) do |q|
207
217
  q.completion = possible_answers
208
218
  q.readline = true
209
219
  q.default = possible_answers.first
@@ -221,7 +231,7 @@ module Reckon
221
231
  end
222
232
 
223
233
  def add_description(row)
224
- desc_answer = @@cli.ask("Enter a new description for this transaction (empty line aborts)\n") do |q|
234
+ desc_answer = @cli.ask("Enter a new description for this transaction (empty line aborts)\n") do |q|
225
235
  q.overwrite = true
226
236
  q.readline = true
227
237
  q.default = row[:description]
@@ -231,7 +241,7 @@ module Reckon
231
241
  end
232
242
 
233
243
  def add_note(row)
234
- desc_answer = @@cli.ask("Enter a new note for this transaction (empty line aborts)\n") do |q|
244
+ desc_answer = @cli.ask("Enter a new note for this transaction (empty line aborts)\n") do |q|
235
245
  q.overwrite = true
236
246
  q.readline = true
237
247
  q.default = row[:note]
@@ -246,7 +256,7 @@ module Reckon
246
256
  [account, match[0]]
247
257
  end
248
258
  }.compact
249
- matches.sort_by! { |_account, matched_text| matched_text.length }.map(&:first)
259
+ matches.sort_by { |_account, matched_text| matched_text.length }.map(&:first)
250
260
  end
251
261
 
252
262
  def suggest(row)
@@ -254,13 +264,6 @@ module Reckon
254
264
  @matcher.find_similar(row[:description]).map { |n| n[:account] }
255
265
  end
256
266
 
257
- def ledger_format(row, line1, line2)
258
- out = "#{row[:pretty_date]}\t#{row[:description]}#{row[:note] ? "\t; " + row[:note]: ""}\n"
259
- out += "\t#{line1.first}\t\t\t#{line1.last}\n"
260
- out += "\t#{line2.first}\t\t\t#{line2.last}\n\n"
261
- out
262
- end
263
-
264
267
  def output(ledger_line)
265
268
  options[:output_file].puts ledger_line
266
269
  options[:output_file].flush
@@ -0,0 +1,150 @@
1
+ require 'rubygems'
2
+ require 'date'
3
+
4
+ module Reckon
5
+ class BeancountParser
6
+
7
+ attr_accessor :entries
8
+
9
+ def initialize(options = {})
10
+ @options = options
11
+ @date_format = options[:ledger_date_format] || options[:date_format] || '%Y-%m-%d'
12
+ end
13
+
14
+ # 2015-01-01 * "Opening Balance for checking account"
15
+ # Assets:US:BofA:Checking 3490.52 USD
16
+ # Equity:Opening-Balances -3490.52 USD
17
+
18
+ # input is an object that response to #each_line,
19
+ # (i.e. a StringIO or an IO object)
20
+ def parse(input)
21
+ entries = []
22
+ comment_chars = ';#%*|'
23
+ new_entry = {}
24
+
25
+ input.each_line do |entry|
26
+
27
+ next if entry =~ /^\s*[#{comment_chars}]/
28
+
29
+ m = entry.match(%r{
30
+ ^
31
+ (\d+[\d/-]+) # date
32
+ \s+
33
+ ([*!])? # type
34
+ \s*
35
+ ("[^"]*")? # description (optional)
36
+ \s*
37
+ ("[^"]*")? # notes (optional)
38
+ # tags (not implemented)
39
+ }x)
40
+
41
+ # (date, type, code, description), type and code are optional
42
+ if (m)
43
+ add_entry(entries, new_entry)
44
+ new_entry = {
45
+ date: try_parse_date(m[1]),
46
+ type: m[2] || "",
47
+ desc: trim_quote(m[3]),
48
+ notes: trim_quote(m[4]),
49
+ accounts: []
50
+ }
51
+ elsif entry =~ /^\s*$/ && new_entry[:date]
52
+ add_entry(entries, new_entry)
53
+ new_entry = {}
54
+ elsif new_entry[:date] && entry =~ /^\s+/
55
+ LOGGER.info("Adding new account #{entry}")
56
+ new_entry[:accounts] << parse_account_line(entry)
57
+ else
58
+ LOGGER.info("Unknown entry type: #{entry}")
59
+ add_entry(entries, new_entry)
60
+ new_entry = {}
61
+ end
62
+
63
+ end
64
+ entries
65
+ end
66
+
67
+ def format_row(row, line1, line2)
68
+ out = %Q{#{row[:pretty_date]} * "#{row[:description]}" "#{row[:note]}\n}
69
+ out += "\t#{line1.first}\t\t\t#{line1.last}\n"
70
+ out += "\t#{line2.first}\t\t\t#{line2.last}\n\n"
71
+ out
72
+ end
73
+
74
+ private
75
+
76
+ # remove leading and trailing quote character (")
77
+ def trim_quote(str)
78
+ return str if !str
79
+ str.gsub(/^"([^"]*)"$/, '\1')
80
+ end
81
+
82
+ def add_entry(entries, entry)
83
+ return unless entry[:date] && entry[:accounts].length > 1
84
+
85
+ entry[:accounts] = balance(entry[:accounts])
86
+ entries << entry
87
+ end
88
+
89
+ def try_parse_date(date_str)
90
+ date = Date.parse(date_str)
91
+ return nil if date.year > 9999 || date.year < 1000
92
+
93
+ date
94
+ rescue ArgumentError
95
+ nil
96
+ end
97
+
98
+ def parse_account_line(entry)
99
+ # TODO handle buying stocks
100
+ # Assets:US:ETrade:VHT 19 VHT {132.32 USD, 2017-08-27}
101
+ (account_name, rest) = entry.strip.split(/\s{2,}|\t+/, 2)
102
+
103
+ if rest.nil? || rest.empty?
104
+ return {
105
+ name: account_name,
106
+ amount: clean_money("")
107
+ }
108
+ end
109
+
110
+ value = if rest =~ /{/
111
+ (qty, dollar_value, date) = rest.split(/[{,]/)
112
+ (qty.to_f * dollar_value.to_f).to_s
113
+ else
114
+ rest
115
+ end
116
+
117
+ return {
118
+ name: account_name,
119
+ amount: clean_money(value || "")
120
+ }
121
+ end
122
+
123
+ def balance(accounts)
124
+ return accounts unless accounts.any? { |i| i[:amount].nil? }
125
+
126
+ sum = accounts.reduce(0) { |m, n| m + (n[:amount] || 0) }
127
+ count = 0
128
+ accounts.each do |account|
129
+ next unless account[:amount].nil?
130
+
131
+ count += 1
132
+ account[:amount] = -sum
133
+ end
134
+ if count > 1
135
+ puts "Warning: unparsable entry due to more than one missing money value."
136
+ p accounts
137
+ puts
138
+ end
139
+
140
+ accounts
141
+ end
142
+
143
+ def clean_money(money)
144
+ return nil if money.nil? || money.empty?
145
+
146
+ money.gsub(/[^0-9.-]/, '').to_f
147
+ end
148
+ end
149
+ end
150
+
@@ -17,7 +17,6 @@ module Reckon
17
17
 
18
18
  def initialize(options)
19
19
  @docs = DocumentInfo.new({}, {})
20
- @options = options
21
20
  end
22
21
 
23
22
  def add_document(account, doc)
@@ -1,32 +1,28 @@
1
- #coding: utf-8
1
+ # frozen_string_literal: true
2
+
3
+ require 'stringio'
2
4
 
3
5
  module Reckon
6
+ # Parses CSV files
4
7
  class CSVParser
5
- attr_accessor :options, :csv_data, :money_column_indices, :date_column_index, :description_column_indices, :money_column, :date_column
8
+ attr_accessor :options, :csv_data, :money_column_indices, :date_column_index,
9
+ :description_column_indices, :money_column, :date_column
6
10
 
7
11
  def initialize(options = {})
8
12
  self.options = options
13
+
14
+ self.options[:csv_separator] = "\t" if options[:csv_separator] == '\t'
9
15
  self.options[:currency] ||= '$'
16
+
17
+ # we convert to a string so we can do character encoding cleanup
10
18
  @csv_data = parse(options[:string] || File.read(options[:file]), options[:file])
11
19
  filter_csv
12
20
  detect_columns
13
21
  end
14
22
 
23
+ # transpose csv_data (array of rows) to an array of columns
15
24
  def columns
16
- @columns ||=
17
- begin
18
- last_row_length = nil
19
- csv_data.inject([]) do |memo, row|
20
- unless row.all? { |i| i.nil? || i.length == 0 }
21
- row.each_with_index do |entry, index|
22
- memo[index] ||= []
23
- memo[index] << (entry || '').strip
24
- end
25
- last_row_length = row.length
26
- end
27
- memo
28
- end
29
- end
25
+ @columns ||= @csv_data[0].zip(*@csv_data[1..])
30
26
  end
31
27
 
32
28
  def date_for(index)
@@ -34,7 +30,7 @@ module Reckon
34
30
  end
35
31
 
36
32
  def pretty_date_for(index)
37
- @date_column.pretty_for( index )
33
+ @date_column.pretty_for(index)
38
34
  end
39
35
 
40
36
  def money_for(index)
@@ -42,7 +38,7 @@ module Reckon
42
38
  end
43
39
 
44
40
  def pretty_money(amount, negate = false)
45
- Money.new( amount, @options ).pretty( negate )
41
+ Money.new(amount, @options).pretty(negate)
46
42
  end
47
43
 
48
44
  def pretty_money_for(index, negate = false)
@@ -54,11 +50,11 @@ module Reckon
54
50
 
55
51
  def description_for(index)
56
52
  description_column_indices.map { |i| columns[i][index].to_s.strip }
57
- .reject(&:empty?)
58
- .join("; ")
59
- .squeeze(" ")
60
- .gsub(/(;\s+){2,}/, '')
61
- .strip
53
+ .reject(&:empty?)
54
+ .join("; ")
55
+ .squeeze(" ")
56
+ .gsub(/(;\s+){2,}/, '')
57
+ .strip
62
58
  end
63
59
 
64
60
  def row(index)
@@ -84,9 +80,10 @@ module Reckon
84
80
  money_score = date_score = possible_neg_money_count = possible_pos_money_count = 0
85
81
  last = nil
86
82
  column.reverse.each_with_index do |entry, row_from_bottom|
83
+ entry ||= "" # entries can be nil
87
84
  row = csv_data[csv_data.length - 1 - row_from_bottom]
88
85
  entry = entry.strip
89
- money_score += Money::likelihood( entry )
86
+ money_score += Money::likelihood(entry)
90
87
  possible_neg_money_count += 1 if entry =~ /^\$?[\-\(]\$?\d+/
91
88
  possible_pos_money_count += 1 if entry =~ /^\+?\$?\+?\d+/
92
89
  date_score += DateColumn.likelihood(entry)
@@ -97,8 +94,8 @@ module Reckon
97
94
  row.each do |row_entry|
98
95
  row_entry = row_entry.to_s.gsub(/[^\-\d\.]/, '').to_f
99
96
  if row_entry != 0 && last + row_entry == entry_as_num
100
- money_score -= 10
101
- break
97
+ money_score -= 10
98
+ break
102
99
  end
103
100
  end
104
101
  end
@@ -110,7 +107,8 @@ module Reckon
110
107
  found_likely_money_column = true
111
108
  end
112
109
 
113
- results << { :index => index, :money_score => money_score, :date_score => date_score }
110
+ results << { :index => index, :money_score => money_score,
111
+ :date_score => date_score }
114
112
  end
115
113
 
116
114
  results.sort_by! { |n| -n[:money_score] }
@@ -129,14 +127,15 @@ module Reckon
129
127
  # Some csv files negative/positive amounts are indicated in separate account
130
128
  def detect_sign_column
131
129
  return if columns[0].length <= 2 # This test needs requires more than two rows otherwise will lead to false positives
130
+
132
131
  signs = []
133
132
  if @money_column_indices[0] > 0
134
- column = columns[ @money_column_indices[0] - 1 ]
133
+ column = columns[@money_column_indices[0] - 1]
135
134
  signs = column.uniq
136
135
  end
137
136
  if (signs.length != 2 &&
138
137
  (@money_column_indices[0] + 1 < columns.length))
139
- column = columns[ @money_column_indices[0] + 1 ]
138
+ column = columns[@money_column_indices[0] + 1]
140
139
  signs = column.uniq
141
140
  end
142
141
  if signs.length == 2
@@ -166,15 +165,19 @@ module Reckon
166
165
  self.money_column_indices = [options[:money_column] - 1]
167
166
  elsif options[:money_columns].length == 2
168
167
  in_col, out_col = options[:money_columns]
169
- self.money_column_indices = [in_col -1, out_col -1]
168
+ self.money_column_indices = [in_col - 1, out_col - 1]
170
169
  else
171
170
  puts "Unable to determine money columns, use --money-columns to specify the 1 or 2 column(s) reckon should use."
172
171
  end
173
172
 
174
173
  # If no money_column(s) argument is supplied, try to automatically infer money_column(s)
175
174
  else
176
- self.money_column_indices = results.select { |n| n[:is_money_column] }.map { |n| n[:index] }
175
+ self.money_column_indices = results.select { |n|
176
+ n[:is_money_column]
177
+ }.map { |n| n[:index] }
177
178
  if self.money_column_indices.length == 1
179
+ # TODO: print the unfiltered column number, not the filtered
180
+ # ie if money column is 7, but we ignore columns 4 and 5, this prints "Using column 5 as the money column"
178
181
  puts "Using column #{money_column_indices.first + 1} as the money column. Use --money-colum to specify a different one."
179
182
  elsif self.money_column_indices.length == 2
180
183
  puts "Using columns #{money_column_indices[0] + 1} and #{money_column_indices[1] + 1} as money column. Use --money-columns to specify different ones."
@@ -204,20 +207,53 @@ module Reckon
204
207
  self.description_column_indices = results.map { |i| i[:index] }
205
208
  end
206
209
 
207
- def parse(data, filename=nil)
210
+ def parse(data, filename = nil)
208
211
  # Use force_encoding to convert the string to utf-8 with as few invalid characters
209
212
  # as possible.
210
213
  data.force_encoding(try_encoding(data, filename))
211
214
  data = data.encode('UTF-8', invalid: :replace, undef: :replace, replace: '?')
212
215
  data.sub!("\xEF\xBB\xBF", '') # strip byte order marker, if it exists
213
216
 
214
- rows = []
215
- data.each_line.with_index do |line, i|
216
- next if i < (options[:contains_header] || 0)
217
- rows << CSV.parse_line(line, col_sep: options[:csv_separator] || ',')
217
+ separator = options[:csv_separator] || guess_column_separator(data)
218
+ header_lines_to_skip = options[:contains_header] || 0
219
+ # -1 is skip 0 footer rows
220
+ footer_lines_to_skip = (options[:contains_footer] || 0) + 1
221
+
222
+ # convert to a stringio object to handle multi-line fields
223
+ parser_opts = {
224
+ col_sep: separator,
225
+ skip_blanks: true
226
+ }
227
+ begin
228
+ rows = CSV.parse(StringIO.new(data), **parser_opts)
229
+ rows[header_lines_to_skip..-footer_lines_to_skip]
230
+ rescue CSV::MalformedCSVError
231
+ # try removing N header lines before parsing
232
+ index = 0
233
+ count = 0
234
+ while count < header_lines_to_skip
235
+ index = data.index("\n", index) + 1 # skip over newline character
236
+ count += 1
237
+ end
238
+ rows = CSV.parse(StringIO.new(data[index..-1]), **parser_opts)
239
+ rows[0..-footer_lines_to_skip]
240
+ end
241
+ end
242
+
243
+ def guess_column_separator(data)
244
+ delimiters = [',', "\t", ';', ':', '|']
245
+
246
+ counts = [0] * delimiters.length
247
+
248
+ data.each_line do |line|
249
+ delimiters.each_with_index do |delim, i|
250
+ counts[i] += line.count(delim)
251
+ end
218
252
  end
219
253
 
220
- rows
254
+ LOGGER.info("guessing #{delimiters[counts.index(counts.max)]} as csv separator")
255
+
256
+ delimiters[counts.index(counts.max)]
221
257
  end
222
258
 
223
259
  def try_encoding(data, filename = nil)