heuristic-csv 0.0.2.alpha

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ coverage
6
+ InstalledFiles
7
+ lib/bundler/man
8
+ pkg
9
+ rdoc
10
+ spec/reports
11
+ test/tmp
12
+ test/version_tmp
13
+ tmp
14
+
15
+ # YARD artifacts
16
+ .yardoc
17
+ _yardoc
18
+ doc/
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'http://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in columns-matcher.gemspec
4
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,24 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ heuristic-csv (0.0.2.alpha)
5
+
6
+ GEM
7
+ remote: http://rubygems.org/
8
+ specs:
9
+ diff-lcs (1.1.3)
10
+ rspec (2.6.0)
11
+ rspec-core (~> 2.6.0)
12
+ rspec-expectations (~> 2.6.0)
13
+ rspec-mocks (~> 2.6.0)
14
+ rspec-core (2.6.4)
15
+ rspec-expectations (2.6.0)
16
+ diff-lcs (~> 1.1.2)
17
+ rspec-mocks (2.6.0)
18
+
19
+ PLATFORMS
20
+ ruby
21
+
22
+ DEPENDENCIES
23
+ heuristic-csv!
24
+ rspec (~> 2.6.0)
data/README.md ADDED
@@ -0,0 +1,5 @@
1
+ HeuristicCsv
2
+ =============
3
+
4
+ Standard CSV library raise an exception when you try to read some malformed CSV data.
5
+ Sometimes you need to read readable data anyway. This gem adds to standard CSV library some methods to try to overcome the CSV::MalformedCSVError exception and read the data using RegExp to fix the malformed data.
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+ require 'rspec/core/rake_task'
4
+
5
+ RSpec::Core::RakeTask.new(:spec)
6
+ task default: :spec
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/heuristic-csv/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Andrea Mostosi"]
6
+ gem.email = ["andrea.mostosi@zenkay.net"]
7
+ gem.description = %q{Try to overcome the exception CSV::MalformedCSVError and read the data anyway}
8
+ gem.summary = %q{CSV extension}
9
+ gem.homepage = "https://github.com/zenkay/heuristic-csv"
10
+
11
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
12
+ gem.files = `git ls-files`.split("\n")
13
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
14
+ gem.name = "heuristic-csv"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = HeuristicCsv::VERSION
17
+
18
+ gem.add_development_dependency "rspec", "~> 2.6.0"
19
+ end
data/lib/.DS_Store ADDED
Binary file
@@ -0,0 +1,30 @@
1
+ require "heuristic-csv/version"
2
+
3
+ class CSV
4
+ def self.heuristic_parse(data = $stdout, options = Hash.new, &block)
5
+ if block_given?
6
+ begin
7
+ self.parse(data, options, &block)
8
+ rescue Exception => e
9
+ data = self.remove_misplaced_quotes(data, options)
10
+ self.parse(data, options, &block)
11
+ end
12
+ end
13
+ end
14
+
15
+ private
16
+
17
+ # Strategy 1: Replace misplaced quotes using regular expressions and remove it
18
+ def self.remove_misplaced_quotes(data, options)
19
+ if options.nil? or options[:col_sep].nil?
20
+ sep = DEFAULT_OPTIONS[:col_sep]
21
+ else
22
+ sep = options[:col_sep]
23
+ end
24
+ strings_with_problems = data.scan(/\"([^\n#{sep}]*\"[^\n#{sep}]*)\"[\n#{sep}]+/)
25
+ strings_with_problems.each do |string|
26
+ data.gsub!(string.first, string.first.gsub("\"", ""))
27
+ end
28
+ return data
29
+ end
30
+ end
@@ -0,0 +1,3 @@
1
+ module HeuristicCsv
2
+ VERSION = "0.0.2.alpha"
3
+ end
data/spec/.DS_Store ADDED
Binary file
@@ -0,0 +1,51 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+ describe HeuristicCsv do
4
+
5
+ describe "Heuristic behavior" do
6
+
7
+ it "should parse unquoted CSV strings" do
8
+ data = "argument1,argument2,argument3"
9
+ CSV.heuristic_parse(data) do |row|
10
+ row.count.should eq(3)
11
+ end
12
+ end
13
+
14
+ it "should parse unquoted CSV strings with different col_sep" do
15
+ data = "argument1;argument2;argument3"
16
+ CSV.heuristic_parse(data, {col_sep: ";"}) do |row|
17
+ row.count.should eq(3)
18
+ end
19
+ end
20
+
21
+ it "should parse quoted CSV strings" do
22
+ data = '"argument1","argument2","argument3"'
23
+ CSV.heuristic_parse(data) do |row|
24
+ row.count.should eq(3)
25
+ end
26
+ end
27
+
28
+ it "should parse quoted CSV strings with different col_sep" do
29
+ data = '"argument1";"argument2";"argument3"'
30
+ CSV.heuristic_parse(data, {col_sep: ";"}) do |row|
31
+ row.count.should eq(3)
32
+ end
33
+ end
34
+
35
+ it "should parse CSV strings with misplaced quotes" do
36
+ data = '"argument1","argum"WRONG"ent2","argument3"'
37
+ CSV.heuristic_parse(data) do |row|
38
+ row.count.should eq(3)
39
+ end
40
+ end
41
+
42
+ it "should parse CSV strings with misplaced quotes with different col_sep" do
43
+ data = '"argument1";"argum"WRONG"ent2";"argument3"'
44
+ CSV.heuristic_parse(data, {col_sep: ";"}) do |row|
45
+ row.count.should eq(3)
46
+ end
47
+ end
48
+
49
+ end
50
+
51
+ end
@@ -0,0 +1,2 @@
1
+ require 'csv'
2
+ require 'heuristic-csv'
metadata ADDED
@@ -0,0 +1,71 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: heuristic-csv
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2.alpha
5
+ prerelease: 6
6
+ platform: ruby
7
+ authors:
8
+ - Andrea Mostosi
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-06-17 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: &70290990509180 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 2.6.0
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *70290990509180
25
+ description: Try to overcome the exception CSV::MalformedCSVError and read the data
26
+ anyway
27
+ email:
28
+ - andrea.mostosi@zenkay.net
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - .gitignore
34
+ - Gemfile
35
+ - Gemfile.lock
36
+ - README.md
37
+ - Rakefile
38
+ - heuristic-csv.gemspec
39
+ - lib/.DS_Store
40
+ - lib/heuristic-csv.rb
41
+ - lib/heuristic-csv/version.rb
42
+ - spec/.DS_Store
43
+ - spec/heuristic-csv_spec.rb
44
+ - spec/spec_helper.rb
45
+ homepage: https://github.com/zenkay/heuristic-csv
46
+ licenses: []
47
+ post_install_message:
48
+ rdoc_options: []
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ! '>'
61
+ - !ruby/object:Gem::Version
62
+ version: 1.3.1
63
+ requirements: []
64
+ rubyforge_project:
65
+ rubygems_version: 1.8.10
66
+ signing_key:
67
+ specification_version: 3
68
+ summary: CSV extension
69
+ test_files:
70
+ - spec/heuristic-csv_spec.rb
71
+ - spec/spec_helper.rb