comma_splice 0.2.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +16 -0
- data/.github/workflows/ruby.yml +21 -0
- data/.gitignore +2 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile.lock +32 -33
- data/README.md +9 -0
- data/bin/comma_splice +9 -4
- data/comma_splice.gemspec +1 -1
- data/lib/comma_splice/file_corrector.rb +9 -8
- data/lib/comma_splice/helpers/comma_calculator.rb +18 -9
- data/lib/comma_splice/helpers/content_finder.rb +6 -6
- data/lib/comma_splice/helpers/line.rb +5 -3
- data/lib/comma_splice/helpers/option_scorer.rb +11 -10
- data/lib/comma_splice/helpers/variable_column_finder.rb +7 -7
- data/lib/comma_splice/line_corrector.rb +20 -10
- data/lib/comma_splice/version.rb +1 -1
- metadata +10 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 67cde27bb8ea56782dfe614bb99c593c121c369c01730e211aa7e058cbc22251
|
4
|
+
data.tar.gz: f015c933560a42374d201422780d693ee31ac43394a9d587aa80f582037d647e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e34800ecbdfd7454b83aa6faa34ec48aa72468428df32c11176319ae44cd1b50b6c06195072ad5bec15968394830eb51209795ed4861406468a6384f0ec7f665
|
7
|
+
data.tar.gz: cf77db07813250f91e0dd1e73bd0b11740ffa2fb50b00e5f4bd9ec3e3023c89c34c73afb31d735c00297f04135448c5ca6ff26dcdb00243b2b6bcc1fb5e1fc6f
|
@@ -0,0 +1,16 @@
|
|
1
|
+
version: 2
|
2
|
+
updates:
|
3
|
+
- package-ecosystem: bundler
|
4
|
+
directory: "/"
|
5
|
+
schedule:
|
6
|
+
interval: daily
|
7
|
+
time: "11:00"
|
8
|
+
open-pull-requests-limit: 10
|
9
|
+
ignore:
|
10
|
+
- dependency-name: activesupport
|
11
|
+
versions:
|
12
|
+
- ">= 6.a, < 7"
|
13
|
+
- dependency-name: activesupport
|
14
|
+
versions:
|
15
|
+
- 5.2.4.4
|
16
|
+
- 5.2.4.5
|
@@ -0,0 +1,21 @@
|
|
1
|
+
name: Ruby
|
2
|
+
on: [push, pull_request]
|
3
|
+
jobs:
|
4
|
+
test:
|
5
|
+
strategy:
|
6
|
+
fail-fast: false
|
7
|
+
matrix:
|
8
|
+
os: [ ubuntu-latest, macos-latest ]
|
9
|
+
ruby: [ '2.5', '2.6', '2.7', '3.0', '3.1' ]
|
10
|
+
runs-on: ${{ matrix.os }}
|
11
|
+
steps:
|
12
|
+
- uses: actions/checkout@v3
|
13
|
+
- name: Set up Ruby
|
14
|
+
uses: ruby/setup-ruby@v1
|
15
|
+
with:
|
16
|
+
ruby-version: ${{ matrix.ruby }}
|
17
|
+
- name: Build and test with Rake
|
18
|
+
run: |
|
19
|
+
gem install bundler
|
20
|
+
bundle install --jobs 4 --retry 3
|
21
|
+
bundle exec rake
|
data/.gitignore
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,9 @@
|
|
1
1
|
# Changelog
|
2
|
+
### 0.3.0 (November 3, 2022)
|
3
|
+
- [IMPROVEMENT] Add support for using a separator besides comma, through the `separator: ` argument. Defaults to comma if not supplied.
|
4
|
+
|
5
|
+
### 0.2.3 (January 28, 2020)
|
6
|
+
- [BUGFIX] Fix another scoring issue
|
2
7
|
|
3
8
|
### 0.2.2 (January 27, 2020)
|
4
9
|
- [BUGFIX] Fix another scoring issue
|
data/Gemfile.lock
CHANGED
@@ -1,47 +1,46 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
comma_splice (0.2.
|
4
|
+
comma_splice (0.2.3)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
|
-
activesupport (
|
9
|
+
activesupport (7.0.4)
|
10
10
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
11
|
-
i18n (>=
|
12
|
-
minitest (
|
13
|
-
tzinfo (~>
|
14
|
-
byebug (11.
|
15
|
-
concurrent-ruby (1.1.
|
16
|
-
diff-lcs (1.
|
17
|
-
docile (1.
|
18
|
-
i18n (1.
|
11
|
+
i18n (>= 1.6, < 2)
|
12
|
+
minitest (>= 5.1)
|
13
|
+
tzinfo (~> 2.0)
|
14
|
+
byebug (11.1.3)
|
15
|
+
concurrent-ruby (1.1.10)
|
16
|
+
diff-lcs (1.5.0)
|
17
|
+
docile (1.4.0)
|
18
|
+
i18n (1.12.0)
|
19
19
|
concurrent-ruby (~> 1.0)
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
rspec-
|
25
|
-
rspec-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
rspec-expectations (3.8.4)
|
20
|
+
minitest (5.16.3)
|
21
|
+
rake (13.0.6)
|
22
|
+
rspec (3.12.0)
|
23
|
+
rspec-core (~> 3.12.0)
|
24
|
+
rspec-expectations (~> 3.12.0)
|
25
|
+
rspec-mocks (~> 3.12.0)
|
26
|
+
rspec-core (3.12.0)
|
27
|
+
rspec-support (~> 3.12.0)
|
28
|
+
rspec-expectations (3.12.0)
|
30
29
|
diff-lcs (>= 1.2.0, < 2.0)
|
31
|
-
rspec-support (~> 3.
|
32
|
-
rspec-mocks (3.
|
30
|
+
rspec-support (~> 3.12.0)
|
31
|
+
rspec-mocks (3.12.0)
|
33
32
|
diff-lcs (>= 1.2.0, < 2.0)
|
34
|
-
rspec-support (~> 3.
|
35
|
-
rspec-support (3.
|
36
|
-
simplecov (0.
|
33
|
+
rspec-support (~> 3.12.0)
|
34
|
+
rspec-support (3.12.0)
|
35
|
+
simplecov (0.21.2)
|
37
36
|
docile (~> 1.1)
|
38
|
-
|
39
|
-
|
40
|
-
simplecov-html (0.
|
41
|
-
|
42
|
-
|
43
|
-
tzinfo (
|
44
|
-
|
37
|
+
simplecov-html (~> 0.11)
|
38
|
+
simplecov_json_formatter (~> 0.1)
|
39
|
+
simplecov-html (0.12.3)
|
40
|
+
simplecov_json_formatter (0.1.4)
|
41
|
+
thor (1.2.1)
|
42
|
+
tzinfo (2.0.5)
|
43
|
+
concurrent-ruby (~> 1.0)
|
45
44
|
|
46
45
|
PLATFORMS
|
47
46
|
ruby
|
@@ -51,7 +50,7 @@ DEPENDENCIES
|
|
51
50
|
bundler (~> 2.0)
|
52
51
|
byebug
|
53
52
|
comma_splice!
|
54
|
-
rake (~>
|
53
|
+
rake (~> 13.0)
|
55
54
|
rspec
|
56
55
|
simplecov
|
57
56
|
thor
|
data/README.md
CHANGED
@@ -87,6 +87,9 @@ You can use this in a ruby program by using installing the `comma_splice` gem, o
|
|
87
87
|
|
88
88
|
```ruby
|
89
89
|
CommaSplice::FileCorrector.new(file_path).bad_lines.size
|
90
|
+
|
91
|
+
#you can specify another separator
|
92
|
+
CommaSplice::FileCorrector.new(file_path, separator: ';').bad_lines.size
|
90
93
|
```
|
91
94
|
```
|
92
95
|
comma_splice bad_line_count /path/to/file.csv
|
@@ -95,6 +98,9 @@ You can use this in a ruby program by using installing the `comma_splice` gem, o
|
|
95
98
|
##### Display the fixed contents
|
96
99
|
```ruby
|
97
100
|
CommaSplice::FileCorrector.new(file_path).corrected
|
101
|
+
|
102
|
+
#you can specify another separator
|
103
|
+
CommaSplice::FileCorrector.new(file_path, separator: ';').corrected
|
98
104
|
```
|
99
105
|
```bash
|
100
106
|
comma_splice correct /path/to/file.csv
|
@@ -103,6 +109,9 @@ You can use this in a ruby program by using installing the `comma_splice` gem, o
|
|
103
109
|
##### Process a file and save the fixed version
|
104
110
|
```ruby
|
105
111
|
CommaSplice::FileCorrector.new(file_path).save(save_path)
|
112
|
+
|
113
|
+
#you can specify another separator
|
114
|
+
CommaSplice::FileCorrector.new(file_path, separator: ';').save(save_path)
|
106
115
|
```
|
107
116
|
```bash
|
108
117
|
comma_splice fix /path/to/file.csv /path/to/save
|
data/bin/comma_splice
CHANGED
@@ -8,6 +8,7 @@ class CommaSpliceCLI < Thor
|
|
8
8
|
class_option :start_line, type: :numeric, default: nil
|
9
9
|
class_option :end_line, type: :numeric, default: nil
|
10
10
|
class_option :debug, type: :boolean, default: false
|
11
|
+
class_option :separator, type: :string, default: ','
|
11
12
|
|
12
13
|
desc 'version', 'print the current comma_splice version'
|
13
14
|
def version
|
@@ -21,7 +22,8 @@ class CommaSpliceCLI < Thor
|
|
21
22
|
file_corrector = CommaSplice::FileCorrector.new(
|
22
23
|
file_path,
|
23
24
|
start_line: options[:start_line],
|
24
|
-
end_line: options[:end_line]
|
25
|
+
end_line: options[:end_line],
|
26
|
+
separator: options[:separator]
|
25
27
|
)
|
26
28
|
|
27
29
|
puts file_corrector.corrected
|
@@ -34,7 +36,8 @@ class CommaSpliceCLI < Thor
|
|
34
36
|
file_corrector = CommaSplice::FileCorrector.new(
|
35
37
|
file_path,
|
36
38
|
start_line: options[:start_line],
|
37
|
-
end_line: options[:end_line]
|
39
|
+
end_line: options[:end_line],
|
40
|
+
separator: options[:separator]
|
38
41
|
)
|
39
42
|
|
40
43
|
file_corrector.save(fix_path)
|
@@ -47,7 +50,8 @@ class CommaSpliceCLI < Thor
|
|
47
50
|
file_corrector = CommaSplice::FileCorrector.new(
|
48
51
|
file_path,
|
49
52
|
start_line: options[:start_line],
|
50
|
-
end_line: options[:end_line]
|
53
|
+
end_line: options[:end_line],
|
54
|
+
separator: options[:separator]
|
51
55
|
)
|
52
56
|
|
53
57
|
puts file_corrector.bad_lines
|
@@ -60,7 +64,8 @@ class CommaSpliceCLI < Thor
|
|
60
64
|
file_corrector = CommaSplice::FileCorrector.new(
|
61
65
|
file_path,
|
62
66
|
start_line: options[:start_line],
|
63
|
-
end_line: options[:end_line]
|
67
|
+
end_line: options[:end_line],
|
68
|
+
separator: options[:separator]
|
64
69
|
)
|
65
70
|
|
66
71
|
puts file_corrector.bad_lines.size
|
data/comma_splice.gemspec
CHANGED
@@ -38,7 +38,7 @@ Gem::Specification.new do |spec|
|
|
38
38
|
spec.require_paths = ["lib"]
|
39
39
|
|
40
40
|
spec.add_development_dependency "bundler", "~> 2.0"
|
41
|
-
spec.add_development_dependency "rake", "~>
|
41
|
+
spec.add_development_dependency "rake", "~> 13.0"
|
42
42
|
spec.add_development_dependency "rspec"
|
43
43
|
spec.add_development_dependency "byebug"
|
44
44
|
spec.add_development_dependency "activesupport"
|
@@ -1,21 +1,22 @@
|
|
1
1
|
module CommaSplice
|
2
2
|
class FileCorrector
|
3
|
-
attr_reader :file_contents, :csv_content, :start_line, :end_line, :start_column, :end_column
|
3
|
+
attr_reader :file_contents, :csv_content, :start_line, :end_line, :start_column, :end_column, :separator
|
4
4
|
|
5
|
-
def initialize(file_path, start_line: nil, end_line:nil, start_column: nil, end_column: nil)
|
5
|
+
def initialize(file_path, start_line: nil, end_line: nil, start_column: nil, end_column: nil, separator: ',')
|
6
6
|
@file_path = file_path
|
7
7
|
@file_contents = File.read(file_path, encoding: 'utf-8')
|
8
|
+
@separator = separator
|
8
9
|
|
9
|
-
|
10
|
+
|
11
|
+
@content_finder = ContentFinder.new(@file_contents, start_line, end_line, separator)
|
10
12
|
@csv_content = @content_finder.content
|
11
13
|
@start_line = @content_finder.start_line
|
12
14
|
@end_line = @content_finder.end_line
|
13
|
-
|
14
15
|
if start_column && end_column
|
15
16
|
@start_column = start_column
|
16
17
|
@end_column = end_column
|
17
18
|
else
|
18
|
-
finder = VariableColumnFinder.new(@csv_content[0], @csv_content[1..-1])
|
19
|
+
finder = VariableColumnFinder.new(@csv_content[0], @csv_content[1..-1], @separator)
|
19
20
|
@start_column = finder.start_column
|
20
21
|
@end_column = finder.end_column
|
21
22
|
end
|
@@ -24,7 +25,7 @@ module CommaSplice
|
|
24
25
|
end
|
25
26
|
|
26
27
|
def header
|
27
|
-
@header ||= Line.new(csv_content.first)
|
28
|
+
@header ||= Line.new(csv_content.first, @separator)
|
28
29
|
end
|
29
30
|
|
30
31
|
def bad_lines
|
@@ -60,7 +61,7 @@ module CommaSplice
|
|
60
61
|
end
|
61
62
|
end
|
62
63
|
|
63
|
-
def to_json
|
64
|
+
def to_json(*_args)
|
64
65
|
@content_finder.parsed.try(:to_json)
|
65
66
|
end
|
66
67
|
|
@@ -68,7 +69,7 @@ module CommaSplice
|
|
68
69
|
|
69
70
|
def line_correctors
|
70
71
|
@line_correctors ||= csv_content.collect do |line|
|
71
|
-
LineCorrector.new(header, Line.new(line), @start_column, @end_column)
|
72
|
+
LineCorrector.new(header, Line.new(line, @separator), @start_column, @end_column, @separator)
|
72
73
|
end
|
73
74
|
end
|
74
75
|
|
@@ -1,13 +1,18 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module CommaSplice
|
3
4
|
# provide an array of CSV headers and and array of CSV values
|
4
5
|
# and this will figure out the best correction and prompt
|
5
6
|
# you if it can't find out
|
6
7
|
|
7
8
|
class CommaCalculator
|
8
|
-
def initialize(headers, values)
|
9
|
-
|
9
|
+
def initialize(headers, values, separator = ',')
|
10
|
+
if headers.size > 10 && values.size > 10
|
11
|
+
raise StandardError,
|
12
|
+
"Determining all the possibilities to fit #{values.size} values into the #{headers.size} headers #{headers.inspect} is computationally expensive. Please specify the columns where commas might be."
|
13
|
+
end
|
10
14
|
|
15
|
+
@separator = separator
|
11
16
|
@headers = headers
|
12
17
|
@values = values
|
13
18
|
@longest_header = @headers.max_by(&:length)
|
@@ -41,12 +46,12 @@ module CommaSplice
|
|
41
46
|
end
|
42
47
|
|
43
48
|
@ranked_options ||= @all_options.collect do |option|
|
44
|
-
OptionScorer.new(option)
|
49
|
+
OptionScorer.new(option, separator: @separator)
|
45
50
|
end
|
46
51
|
end
|
47
52
|
|
48
53
|
def score_option(option)
|
49
|
-
OptionScorer.new(option).score
|
54
|
+
OptionScorer.new(option, separator: @separator).score
|
50
55
|
end
|
51
56
|
|
52
57
|
def best_options
|
@@ -62,7 +67,13 @@ module CommaSplice
|
|
62
67
|
@headers.size < @values.size
|
63
68
|
end
|
64
69
|
|
65
|
-
|
70
|
+
def print_all_options
|
71
|
+
ranked_options.each_with_index do |option, index|
|
72
|
+
print_option(option, index)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
protected
|
66
77
|
|
67
78
|
def join_possibilities
|
68
79
|
JoinPossibilities.new(@values.size, @headers.size).possibilities
|
@@ -73,7 +84,7 @@ module CommaSplice
|
|
73
84
|
print_option(option, index)
|
74
85
|
end
|
75
86
|
|
76
|
-
puts
|
87
|
+
puts 'press 0 to see all options' if ranked_options.size != options.size
|
77
88
|
|
78
89
|
selected_option = nil
|
79
90
|
until selected_option && selected_option.to_i > -1
|
@@ -103,9 +114,7 @@ module CommaSplice
|
|
103
114
|
header.ljust(@longest_header.size) + ': ' +
|
104
115
|
option.option[i].to_s.ljust(75)
|
105
116
|
|
106
|
-
if CommaSplice.debug
|
107
|
-
line = line + "| " + (score_breakdown.shift || "")
|
108
|
-
end
|
117
|
+
line = line + '| ' + (score_breakdown.shift || '') if CommaSplice.debug
|
109
118
|
|
110
119
|
lines << line
|
111
120
|
end
|
@@ -4,11 +4,11 @@ module CommaSplice
|
|
4
4
|
# Given a file this will find the CSV content. Some files have some non-csv junk at the top
|
5
5
|
|
6
6
|
class ContentFinder
|
7
|
-
attr_reader :start_line, :end_line, :content
|
7
|
+
attr_reader :start_line, :end_line, :content, :separator
|
8
8
|
|
9
|
-
def initialize(file_contents, start_line = nil, end_line = nil)
|
9
|
+
def initialize(file_contents, start_line = nil, end_line = nil, separator = ',')
|
10
10
|
@file_contents = file_contents
|
11
|
-
|
11
|
+
@separator = separator
|
12
12
|
if start_line && end_line
|
13
13
|
# the csvs this was built for have non-csv headers
|
14
14
|
@start_line = start_line
|
@@ -21,11 +21,11 @@ module CommaSplice
|
|
21
21
|
|
22
22
|
def find_content
|
23
23
|
@start_line = @file_contents.lines.find_index do |line|
|
24
|
-
Line.new(line).values.size > 2
|
24
|
+
Line.new(line, separator).values.size > 2
|
25
25
|
end
|
26
26
|
|
27
27
|
relative_end_line = @file_contents.lines[@start_line..-1].find_index do |line|
|
28
|
-
Line.new(line).values.size < 2
|
28
|
+
Line.new(line, separator).values.size < 2
|
29
29
|
end
|
30
30
|
|
31
31
|
@end_line = if relative_end_line
|
@@ -40,7 +40,7 @@ module CommaSplice
|
|
40
40
|
def parsed
|
41
41
|
quote_chars = %w[" | ~ ^ & *]
|
42
42
|
begin
|
43
|
-
CSV.parse(@content.join(
|
43
|
+
CSV.parse(@content.join('\n'), col_sep: separator, quote_char: quote_chars.shift, headers: :first_row, liberal_parsing: true)
|
44
44
|
rescue CSV::MalformedCSVError
|
45
45
|
quote_chars.empty? ? raise : retry
|
46
46
|
end
|
@@ -1,9 +1,10 @@
|
|
1
1
|
module CommaSplice
|
2
2
|
class Line
|
3
|
-
attr_reader :values, :line
|
3
|
+
attr_reader :values, :line, :separator
|
4
4
|
|
5
|
-
def initialize(line)
|
5
|
+
def initialize(line, separator)
|
6
6
|
@line = line
|
7
|
+
@separator = separator
|
7
8
|
@values = parse_csv_content(line).first
|
8
9
|
end
|
9
10
|
|
@@ -12,7 +13,8 @@ module CommaSplice
|
|
12
13
|
def parse_csv_content(content, headers = false)
|
13
14
|
quote_chars = %w[" | ~ ^ & *]
|
14
15
|
begin
|
15
|
-
CSV.parse(content.mb_chars.tidy_bytes.to_s, quote_char: quote_chars.shift,
|
16
|
+
CSV.parse(content.mb_chars.tidy_bytes.to_s, col_sep: @separator, quote_char: quote_chars.shift,
|
17
|
+
headers:, liberal_parsing: true)
|
16
18
|
rescue CSV::MalformedCSVError
|
17
19
|
quote_chars.empty? ? raise : retry
|
18
20
|
end
|
@@ -3,9 +3,10 @@ module CommaSplice
|
|
3
3
|
class OptionScorer
|
4
4
|
attr_reader :option
|
5
5
|
|
6
|
-
def initialize(option)
|
6
|
+
def initialize(option, separator: ',')
|
7
7
|
@option = option
|
8
8
|
@start_score = 100
|
9
|
+
@separator = separator
|
9
10
|
end
|
10
11
|
|
11
12
|
def breakdown
|
@@ -15,9 +16,7 @@ module CommaSplice
|
|
15
16
|
rules.each do |rule|
|
16
17
|
rule_score = send(rule.to_sym)
|
17
18
|
score += rule_score
|
18
|
-
if rule_score != 0
|
19
|
-
breakdown << "#{rule_score.to_s.ljust(3)} #{rule.to_sym}"
|
20
|
-
end
|
19
|
+
breakdown << "#{rule_score.to_s.ljust(3)} #{rule.to_sym}" if rule_score != 0
|
21
20
|
end
|
22
21
|
|
23
22
|
breakdown.unshift("score: #{score}")
|
@@ -43,21 +42,21 @@ module CommaSplice
|
|
43
42
|
end.size * -1
|
44
43
|
end
|
45
44
|
|
46
|
-
def
|
45
|
+
def options_that_start_with_a_separator
|
47
46
|
option.select do |o|
|
48
|
-
o.to_s.starts_with?(
|
47
|
+
o.to_s.starts_with?(@separator)
|
49
48
|
end.size * -5
|
50
49
|
end
|
51
50
|
|
52
|
-
def
|
51
|
+
def options_that_end_with_a_separator
|
53
52
|
option.select do |o|
|
54
|
-
o.to_s.ends_with?(
|
53
|
+
o.to_s.ends_with?(@separator)
|
55
54
|
end.size * -5
|
56
55
|
end
|
57
56
|
|
58
|
-
def
|
57
|
+
def options_that_have_words_joined_by_separators
|
59
58
|
option.select do |o|
|
60
|
-
|
59
|
+
Regexp.new("[^0-9\\s]#{@separator}\\w").match(o.to_s) || Regexp.new("\\w#{@separator}[^0-9\\s]").match(o.to_s)
|
61
60
|
end.compact.size * -5
|
62
61
|
end
|
63
62
|
|
@@ -68,6 +67,8 @@ module CommaSplice
|
|
68
67
|
end
|
69
68
|
|
70
69
|
def options_that_have_longest_comma_separated_number
|
70
|
+
# return 0 unless @separator == ','
|
71
|
+
|
71
72
|
# favor items that have a longer comma separated number
|
72
73
|
# i.e in the following example, option 1 should win
|
73
74
|
# (1) artist : Half Japanese
|
@@ -18,12 +18,12 @@ module CommaSplice
|
|
18
18
|
# 17385094,,,01-27-2019 @ 13:47:00,KIng Tubby Meets The Upsetter,King And The Upsetter At Spanish Town,KIng Tubby Meets The Upsetter,Celluloid,post,live,y,
|
19
19
|
|
20
20
|
class VariableColumnFinder
|
21
|
-
attr_reader :start_column, :end_column
|
21
|
+
attr_reader :start_column, :end_column, :separator
|
22
22
|
|
23
|
-
def initialize(header_line, value_lines)
|
23
|
+
def initialize(header_line, value_lines, separator = ',')
|
24
24
|
@values = value_lines
|
25
25
|
@header = header_line
|
26
|
-
|
26
|
+
@separator = separator
|
27
27
|
find_variable_column_boundaries
|
28
28
|
end
|
29
29
|
|
@@ -44,9 +44,9 @@ module CommaSplice
|
|
44
44
|
|
45
45
|
def left_to_right_index
|
46
46
|
left_to_right_index = []
|
47
|
-
@header.split(
|
47
|
+
@header.split(@separator).size.times do |time|
|
48
48
|
left_to_right_index.push(@values.map do |value_line|
|
49
|
-
value_line.split(
|
49
|
+
value_line.split(@separator)[time].to_s.size
|
50
50
|
end.uniq.size == 1)
|
51
51
|
end
|
52
52
|
|
@@ -55,9 +55,9 @@ module CommaSplice
|
|
55
55
|
|
56
56
|
def right_to_left_index
|
57
57
|
right_to_left_index = []
|
58
|
-
@header.split(
|
58
|
+
@header.split(@separator).size.times do |time|
|
59
59
|
right_to_left_index.unshift(@values.map do |value_line|
|
60
|
-
value_line.split(
|
60
|
+
value_line.split(@separator)[-time].to_s.size
|
61
61
|
end.uniq.size == 1)
|
62
62
|
end
|
63
63
|
|
@@ -1,10 +1,10 @@
|
|
1
1
|
module CommaSplice
|
2
2
|
class LineCorrector
|
3
|
-
attr_reader :headers, :values, :header_line, :value_line, :right_bounds, :left_bounds
|
3
|
+
attr_reader :headers, :values, :header_line, :value_line, :right_bounds, :left_bounds, :separator
|
4
4
|
|
5
|
-
def initialize(header_line, value_line, left_bounds = 0, right_bounds = -1)
|
6
|
-
header_line = Line.new(header_line) unless header_line.is_a?(Line)
|
7
|
-
value_line = Line.new(value_line) unless value_line.is_a?(Line)
|
5
|
+
def initialize(header_line, value_line, left_bounds = 0, right_bounds = -1, separator = ',')
|
6
|
+
header_line = Line.new(header_line, separator) unless header_line.is_a?(Line)
|
7
|
+
value_line = Line.new(value_line, separator) unless value_line.is_a?(Line)
|
8
8
|
|
9
9
|
@header_line = header_line
|
10
10
|
@value_line = value_line
|
@@ -12,13 +12,14 @@ module CommaSplice
|
|
12
12
|
@values = value_line.values
|
13
13
|
@left_bounds = left_bounds
|
14
14
|
@right_bounds = right_bounds
|
15
|
+
@separator = separator
|
15
16
|
|
16
17
|
raise 'right bounds must be negative' unless right_bounds.negative?
|
17
18
|
raise 'left bounds must be not be negative' if left_bounds.negative?
|
18
19
|
end
|
19
20
|
|
20
21
|
def needs_correcting?
|
21
|
-
@values
|
22
|
+
@values&.size&.positive? && @headers.size != @values.size
|
22
23
|
end
|
23
24
|
|
24
25
|
def needs_manual_input?
|
@@ -26,9 +27,14 @@ module CommaSplice
|
|
26
27
|
end
|
27
28
|
|
28
29
|
def option_count
|
30
|
+
puts corrector.best_options
|
29
31
|
corrector.best_options.size
|
30
32
|
end
|
31
33
|
|
34
|
+
def all_options
|
35
|
+
corrector.ranked_options
|
36
|
+
end
|
37
|
+
|
32
38
|
def original
|
33
39
|
generate_csv_line(@values)
|
34
40
|
end
|
@@ -48,14 +54,18 @@ module CommaSplice
|
|
48
54
|
generate_csv_line([values_before, corrector.correction, values_after].flatten)
|
49
55
|
end
|
50
56
|
|
51
|
-
|
52
|
-
|
53
|
-
def generate_csv_line(values)
|
54
|
-
CSV.generate_line(values)
|
57
|
+
def print_all_options
|
58
|
+
corrector.print_all_options
|
55
59
|
end
|
56
60
|
|
61
|
+
protected
|
62
|
+
|
57
63
|
def corrector
|
58
|
-
CommaCalculator.new(selected_headers, selected_values)
|
64
|
+
CommaCalculator.new(selected_headers, selected_values, @separator)
|
65
|
+
end
|
66
|
+
|
67
|
+
def generate_csv_line(values)
|
68
|
+
CSV.generate_line(values, col_sep: @separator)
|
59
69
|
end
|
60
70
|
|
61
71
|
def selected_headers
|
data/lib/comma_splice/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: comma_splice
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jeff Keen
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-11-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '13.0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '13.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rspec
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -102,6 +102,8 @@ executables:
|
|
102
102
|
extensions: []
|
103
103
|
extra_rdoc_files: []
|
104
104
|
files:
|
105
|
+
- ".github/dependabot.yml"
|
106
|
+
- ".github/workflows/ruby.yml"
|
105
107
|
- ".gitignore"
|
106
108
|
- ".rspec"
|
107
109
|
- CHANGELOG.md
|
@@ -129,7 +131,7 @@ licenses:
|
|
129
131
|
- MIT
|
130
132
|
metadata:
|
131
133
|
allowed_push_host: https://rubygems.org
|
132
|
-
post_install_message:
|
134
|
+
post_install_message:
|
133
135
|
rdoc_options: []
|
134
136
|
require_paths:
|
135
137
|
- lib
|
@@ -144,8 +146,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
144
146
|
- !ruby/object:Gem::Version
|
145
147
|
version: '0'
|
146
148
|
requirements: []
|
147
|
-
rubygems_version: 3.
|
148
|
-
signing_key:
|
149
|
+
rubygems_version: 3.3.7
|
150
|
+
signing_key:
|
149
151
|
specification_version: 4
|
150
152
|
summary: Fixes CSVs with unescaped commas
|
151
153
|
test_files: []
|