csv_separator_detector 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 3fb3aafd4be7410864b3da814faf00fcdbfab984
4
+ data.tar.gz: c25146c943306ee463a6068e111637579fdf44b3
5
+ SHA512:
6
+ metadata.gz: 32111c5a1b813bf7a25c9a61dce58b82cb8f830dd0dfabbe64dc5268b40e8c387f7c5ea831e03081553b95784680308cf7d5cfdd68bcfeb7c54cfd2f503f1ec4
7
+ data.tar.gz: a36d8bef9a4ebe54d2ccce9432a191c7f427482a78457ba785e790531545d95c03ce3791b4fc3c1a206cd0ad7534d5e3866ea193d621ae9223700d2b05c3cce9
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/.travis.yml ADDED
@@ -0,0 +1,6 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.3.4
5
+ - 2.4.0
6
+ before_install: gem install bundler -v 1.13.7
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in csv_separator_detector.gemspec
4
+ gemspec
data/README.md ADDED
@@ -0,0 +1,42 @@
1
+ # CsvSeparatorDetector
2
+
3
+ ![travis](https://travis-ci.org/stulentsev/csv_separator_detector.svg?branch=master)
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'csv_separator_detector'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install csv_separator_detector
20
+
21
+ ## Usage
22
+
23
+ ``` ruby
24
+ begin
25
+ content = File.read('/path/to/csv.file')
26
+ separator = CsvSeparatorDetector.new(content).call # => ';'
27
+ rescue CsvSeparatorDetector::Error
28
+ # we couldn't reliably determine separator.
29
+ end
30
+ ```
31
+
32
+ ## Explanation
33
+
34
+ This class knows a few common csv delimiters and tries to parse given content using them
35
+ (by passing each delimiter as `col_sep` option to `CSV` from stdlib). Then it tries to make
36
+ sense of the result. All rows turned out to be just one column wide? That's likely because
37
+ we used comma separator and the file is tab-delimited.
38
+
39
+ ## Contributing
40
+
41
+ Bug reports and pull requests are welcome on GitHub at https://github.com/stulentsev/csv_separator_detector.
42
+
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "csv_separator_detector"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'csv_separator_detector/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "csv_separator_detector"
8
+ spec.version = CsvSeparatorDetector::VERSION
9
+ spec.authors = ["Sergio Tulentsev"]
10
+ spec.email = ["sergei.tulentsev@gmail.com"]
11
+
12
+ spec.summary = %q{Detect separators in CSV content}
13
+ spec.homepage = "https://github.com/stulentsev/csv_separator_detector"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
16
+ f.match(%r{^(test|spec|features)/})
17
+ end
18
+ spec.bindir = "exe"
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_development_dependency "bundler", "~> 1.13"
23
+ spec.add_development_dependency "rake", "~> 10.0"
24
+ spec.add_development_dependency "rspec", "~> 3.0"
25
+ end
@@ -0,0 +1,49 @@
1
+ require "csv_separator_detector/version"
2
+ require 'csv'
3
+
4
+ class CsvSeparatorDetector
5
+ class Error < StandardError;
6
+ end
7
+
8
+ attr_reader :csv_text
9
+
10
+ def initialize(csv_text)
11
+ @csv_text = csv_text
12
+ end
13
+
14
+ def call
15
+ separator_with_most_columns or fail CsvSeparatorDetector::Error
16
+ end
17
+
18
+ def supported_separators
19
+ [',', ';', "\t", '|']
20
+ end
21
+
22
+ private
23
+
24
+ # when two separators are equally good (produce the same amount of columns),
25
+ # return neither of them.
26
+ def separator_with_most_columns
27
+ counts = count_columns_for_separators
28
+
29
+ top_separators = counts.max_by(&:first).last
30
+ top_separators.first if top_separators.length == 1
31
+ end
32
+
33
+ # returns
34
+ # {
35
+ # 2 => [',', ';'],
36
+ # 0 => ['\t']
37
+ # }
38
+ def count_columns_for_separators
39
+ supported_separators.each_with_object({}) do |sep, memo|
40
+ begin
41
+ columns = ::CSV.parse_line(csv_text, col_sep: sep)
42
+ memo[columns.length] ||= []
43
+ memo[columns.length] << sep
44
+ rescue ::CSV::MalformedCSVError
45
+ # do nothing
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,3 @@
1
+ class CsvSeparatorDetector
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,96 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: csv_separator_detector
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Sergio Tulentsev
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2017-04-30 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.13'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.13'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ description:
56
+ email:
57
+ - sergei.tulentsev@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - ".rspec"
64
+ - ".travis.yml"
65
+ - Gemfile
66
+ - README.md
67
+ - Rakefile
68
+ - bin/console
69
+ - bin/setup
70
+ - csv_separator_detector.gemspec
71
+ - lib/csv_separator_detector.rb
72
+ - lib/csv_separator_detector/version.rb
73
+ homepage: https://github.com/stulentsev/csv_separator_detector
74
+ licenses: []
75
+ metadata: {}
76
+ post_install_message:
77
+ rdoc_options: []
78
+ require_paths:
79
+ - lib
80
+ required_ruby_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ required_rubygems_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ requirements: []
91
+ rubyforge_project:
92
+ rubygems_version: 2.6.11
93
+ signing_key:
94
+ specification_version: 4
95
+ summary: Detect separators in CSV content
96
+ test_files: []