heuristic-csv 0.0.2.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +18 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +24 -0
- data/README.md +5 -0
- data/Rakefile +6 -0
- data/heuristic-csv.gemspec +19 -0
- data/lib/.DS_Store +0 -0
- data/lib/heuristic-csv.rb +30 -0
- data/lib/heuristic-csv/version.rb +3 -0
- data/spec/.DS_Store +0 -0
- data/spec/heuristic-csv_spec.rb +51 -0
- data/spec/spec_helper.rb +2 -0
- metadata +71 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
heuristic-csv (0.0.2.alpha)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: http://rubygems.org/
|
8
|
+
specs:
|
9
|
+
diff-lcs (1.1.3)
|
10
|
+
rspec (2.6.0)
|
11
|
+
rspec-core (~> 2.6.0)
|
12
|
+
rspec-expectations (~> 2.6.0)
|
13
|
+
rspec-mocks (~> 2.6.0)
|
14
|
+
rspec-core (2.6.4)
|
15
|
+
rspec-expectations (2.6.0)
|
16
|
+
diff-lcs (~> 1.1.2)
|
17
|
+
rspec-mocks (2.6.0)
|
18
|
+
|
19
|
+
PLATFORMS
|
20
|
+
ruby
|
21
|
+
|
22
|
+
DEPENDENCIES
|
23
|
+
heuristic-csv!
|
24
|
+
rspec (~> 2.6.0)
|
data/README.md
ADDED
@@ -0,0 +1,5 @@
|
|
1
|
+
HeuristicCsv
|
2
|
+
=============
|
3
|
+
|
4
|
+
Standard CSV library raise an exception when you try to read some malformed CSV data.
|
5
|
+
Sometimes you need to read readable data anyway. This gem adds to standard CSV library some methods to try to overcome the CSV::MalformedCSVError exception and read the data using RegExp to fix the malformed data.
|
data/Rakefile
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/heuristic-csv/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Andrea Mostosi"]
|
6
|
+
gem.email = ["andrea.mostosi@zenkay.net"]
|
7
|
+
gem.description = %q{Try to overcome the exception CSV::MalformedCSVError and read the data anyway}
|
8
|
+
gem.summary = %q{CSV extension}
|
9
|
+
gem.homepage = "https://github.com/zenkay/heuristic-csv"
|
10
|
+
|
11
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
12
|
+
gem.files = `git ls-files`.split("\n")
|
13
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
14
|
+
gem.name = "heuristic-csv"
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
gem.version = HeuristicCsv::VERSION
|
17
|
+
|
18
|
+
gem.add_development_dependency "rspec", "~> 2.6.0"
|
19
|
+
end
|
data/lib/.DS_Store
ADDED
Binary file
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require "heuristic-csv/version"
|
2
|
+
|
3
|
+
class CSV
|
4
|
+
def self.heuristic_parse(data = $stdout, options = Hash.new, &block)
|
5
|
+
if block_given?
|
6
|
+
begin
|
7
|
+
self.parse(data, options, &block)
|
8
|
+
rescue Exception => e
|
9
|
+
data = self.remove_misplaced_quotes(data, options)
|
10
|
+
self.parse(data, options, &block)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
# Strategy 1: Replace misplaced quotes using regular expressions and remove it
|
18
|
+
def self.remove_misplaced_quotes(data, options)
|
19
|
+
if options.nil? or options[:col_sep].nil?
|
20
|
+
sep = DEFAULT_OPTIONS[:col_sep]
|
21
|
+
else
|
22
|
+
sep = options[:col_sep]
|
23
|
+
end
|
24
|
+
strings_with_problems = data.scan(/\"([^\n#{sep}]*\"[^\n#{sep}]*)\"[\n#{sep}]+/)
|
25
|
+
strings_with_problems.each do |string|
|
26
|
+
data.gsub!(string.first, string.first.gsub("\"", ""))
|
27
|
+
end
|
28
|
+
return data
|
29
|
+
end
|
30
|
+
end
|
data/spec/.DS_Store
ADDED
Binary file
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
describe HeuristicCsv do
|
4
|
+
|
5
|
+
describe "Heuristic behavior" do
|
6
|
+
|
7
|
+
it "should parse unquoted CSV strings" do
|
8
|
+
data = "argument1,argument2,argument3"
|
9
|
+
CSV.heuristic_parse(data) do |row|
|
10
|
+
row.count.should eq(3)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
it "should parse unquoted CSV strings with different col_sep" do
|
15
|
+
data = "argument1;argument2;argument3"
|
16
|
+
CSV.heuristic_parse(data, {col_sep: ";"}) do |row|
|
17
|
+
row.count.should eq(3)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should parse quoted CSV strings" do
|
22
|
+
data = '"argument1","argument2","argument3"'
|
23
|
+
CSV.heuristic_parse(data) do |row|
|
24
|
+
row.count.should eq(3)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should parse quoted CSV strings with different col_sep" do
|
29
|
+
data = '"argument1";"argument2";"argument3"'
|
30
|
+
CSV.heuristic_parse(data, {col_sep: ";"}) do |row|
|
31
|
+
row.count.should eq(3)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
it "should parse CSV strings with misplaced quotes" do
|
36
|
+
data = '"argument1","argum"WRONG"ent2","argument3"'
|
37
|
+
CSV.heuristic_parse(data) do |row|
|
38
|
+
row.count.should eq(3)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
it "should parse CSV strings with misplaced quotes with different col_sep" do
|
43
|
+
data = '"argument1";"argum"WRONG"ent2";"argument3"'
|
44
|
+
CSV.heuristic_parse(data, {col_sep: ";"}) do |row|
|
45
|
+
row.count.should eq(3)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: heuristic-csv
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2.alpha
|
5
|
+
prerelease: 6
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Andrea Mostosi
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-06-17 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rspec
|
16
|
+
requirement: &70290990509180 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 2.6.0
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70290990509180
|
25
|
+
description: Try to overcome the exception CSV::MalformedCSVError and read the data
|
26
|
+
anyway
|
27
|
+
email:
|
28
|
+
- andrea.mostosi@zenkay.net
|
29
|
+
executables: []
|
30
|
+
extensions: []
|
31
|
+
extra_rdoc_files: []
|
32
|
+
files:
|
33
|
+
- .gitignore
|
34
|
+
- Gemfile
|
35
|
+
- Gemfile.lock
|
36
|
+
- README.md
|
37
|
+
- Rakefile
|
38
|
+
- heuristic-csv.gemspec
|
39
|
+
- lib/.DS_Store
|
40
|
+
- lib/heuristic-csv.rb
|
41
|
+
- lib/heuristic-csv/version.rb
|
42
|
+
- spec/.DS_Store
|
43
|
+
- spec/heuristic-csv_spec.rb
|
44
|
+
- spec/spec_helper.rb
|
45
|
+
homepage: https://github.com/zenkay/heuristic-csv
|
46
|
+
licenses: []
|
47
|
+
post_install_message:
|
48
|
+
rdoc_options: []
|
49
|
+
require_paths:
|
50
|
+
- lib
|
51
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
53
|
+
requirements:
|
54
|
+
- - ! '>='
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0'
|
57
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
58
|
+
none: false
|
59
|
+
requirements:
|
60
|
+
- - ! '>'
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: 1.3.1
|
63
|
+
requirements: []
|
64
|
+
rubyforge_project:
|
65
|
+
rubygems_version: 1.8.10
|
66
|
+
signing_key:
|
67
|
+
specification_version: 3
|
68
|
+
summary: CSV extension
|
69
|
+
test_files:
|
70
|
+
- spec/heuristic-csv_spec.rb
|
71
|
+
- spec/spec_helper.rb
|