heuristic-csv 0.0.2.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ coverage
6
+ InstalledFiles
7
+ lib/bundler/man
8
+ pkg
9
+ rdoc
10
+ spec/reports
11
+ test/tmp
12
+ test/version_tmp
13
+ tmp
14
+
15
+ # YARD artifacts
16
+ .yardoc
17
+ _yardoc
18
+ doc/
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'http://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in columns-matcher.gemspec
4
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,24 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ heuristic-csv (0.0.2.alpha)
5
+
6
+ GEM
7
+ remote: http://rubygems.org/
8
+ specs:
9
+ diff-lcs (1.1.3)
10
+ rspec (2.6.0)
11
+ rspec-core (~> 2.6.0)
12
+ rspec-expectations (~> 2.6.0)
13
+ rspec-mocks (~> 2.6.0)
14
+ rspec-core (2.6.4)
15
+ rspec-expectations (2.6.0)
16
+ diff-lcs (~> 1.1.2)
17
+ rspec-mocks (2.6.0)
18
+
19
+ PLATFORMS
20
+ ruby
21
+
22
+ DEPENDENCIES
23
+ heuristic-csv!
24
+ rspec (~> 2.6.0)
data/README.md ADDED
@@ -0,0 +1,5 @@
1
+ HeuristicCsv
2
+ =============
3
+
4
+ Standard CSV library raise an exception when you try to read some malformed CSV data.
5
+ Sometimes you need to read readable data anyway. This gem adds to standard CSV library some methods to try to overcome the CSV::MalformedCSVError exception and read the data using RegExp to fix the malformed data.
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+ require 'rspec/core/rake_task'
4
+
5
+ RSpec::Core::RakeTask.new(:spec)
6
+ task default: :spec
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/heuristic-csv/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Andrea Mostosi"]
6
+ gem.email = ["andrea.mostosi@zenkay.net"]
7
+ gem.description = %q{Try to overcome the exception CSV::MalformedCSVError and read the data anyway}
8
+ gem.summary = %q{CSV extension}
9
+ gem.homepage = "https://github.com/zenkay/heuristic-csv"
10
+
11
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
12
+ gem.files = `git ls-files`.split("\n")
13
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
14
+ gem.name = "heuristic-csv"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = HeuristicCsv::VERSION
17
+
18
+ gem.add_development_dependency "rspec", "~> 2.6.0"
19
+ end
data/lib/.DS_Store ADDED
Binary file
@@ -0,0 +1,30 @@
1
+ require "heuristic-csv/version"
2
+
3
+ class CSV
4
+ def self.heuristic_parse(data = $stdout, options = Hash.new, &block)
5
+ if block_given?
6
+ begin
7
+ self.parse(data, options, &block)
8
+ rescue Exception => e
9
+ data = self.remove_misplaced_quotes(data, options)
10
+ self.parse(data, options, &block)
11
+ end
12
+ end
13
+ end
14
+
15
+ private
16
+
17
+ # Strategy 1: Replace misplaced quotes using regular expressions and remove it
18
+ def self.remove_misplaced_quotes(data, options)
19
+ if options.nil? or options[:col_sep].nil?
20
+ sep = DEFAULT_OPTIONS[:col_sep]
21
+ else
22
+ sep = options[:col_sep]
23
+ end
24
+ strings_with_problems = data.scan(/\"([^\n#{sep}]*\"[^\n#{sep}]*)\"[\n#{sep}]+/)
25
+ strings_with_problems.each do |string|
26
+ data.gsub!(string.first, string.first.gsub("\"", ""))
27
+ end
28
+ return data
29
+ end
30
+ end
@@ -0,0 +1,3 @@
1
+ module HeuristicCsv
2
+ VERSION = "0.0.2.alpha"
3
+ end
data/spec/.DS_Store ADDED
Binary file
@@ -0,0 +1,51 @@
1
+ # encoding: utf-8
2
+ require 'spec_helper'
3
+ describe HeuristicCsv do
4
+
5
+ describe "Heuristic behavior" do
6
+
7
+ it "should parse unquoted CSV strings" do
8
+ data = "argument1,argument2,argument3"
9
+ CSV.heuristic_parse(data) do |row|
10
+ row.count.should eq(3)
11
+ end
12
+ end
13
+
14
+ it "should parse unquoted CSV strings with different col_sep" do
15
+ data = "argument1;argument2;argument3"
16
+ CSV.heuristic_parse(data, {col_sep: ";"}) do |row|
17
+ row.count.should eq(3)
18
+ end
19
+ end
20
+
21
+ it "should parse quoted CSV strings" do
22
+ data = '"argument1","argument2","argument3"'
23
+ CSV.heuristic_parse(data) do |row|
24
+ row.count.should eq(3)
25
+ end
26
+ end
27
+
28
+ it "should parse quoted CSV strings with different col_sep" do
29
+ data = '"argument1";"argument2";"argument3"'
30
+ CSV.heuristic_parse(data, {col_sep: ";"}) do |row|
31
+ row.count.should eq(3)
32
+ end
33
+ end
34
+
35
+ it "should parse CSV strings with misplaced quotes" do
36
+ data = '"argument1","argum"WRONG"ent2","argument3"'
37
+ CSV.heuristic_parse(data) do |row|
38
+ row.count.should eq(3)
39
+ end
40
+ end
41
+
42
+ it "should parse CSV strings with misplaced quotes with different col_sep" do
43
+ data = '"argument1";"argum"WRONG"ent2";"argument3"'
44
+ CSV.heuristic_parse(data, {col_sep: ";"}) do |row|
45
+ row.count.should eq(3)
46
+ end
47
+ end
48
+
49
+ end
50
+
51
+ end
@@ -0,0 +1,2 @@
1
+ require 'csv'
2
+ require 'heuristic-csv'
metadata ADDED
@@ -0,0 +1,71 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: heuristic-csv
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2.alpha
5
+ prerelease: 6
6
+ platform: ruby
7
+ authors:
8
+ - Andrea Mostosi
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-06-17 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rspec
16
+ requirement: &70290990509180 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 2.6.0
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *70290990509180
25
+ description: Try to overcome the exception CSV::MalformedCSVError and read the data
26
+ anyway
27
+ email:
28
+ - andrea.mostosi@zenkay.net
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - .gitignore
34
+ - Gemfile
35
+ - Gemfile.lock
36
+ - README.md
37
+ - Rakefile
38
+ - heuristic-csv.gemspec
39
+ - lib/.DS_Store
40
+ - lib/heuristic-csv.rb
41
+ - lib/heuristic-csv/version.rb
42
+ - spec/.DS_Store
43
+ - spec/heuristic-csv_spec.rb
44
+ - spec/spec_helper.rb
45
+ homepage: https://github.com/zenkay/heuristic-csv
46
+ licenses: []
47
+ post_install_message:
48
+ rdoc_options: []
49
+ require_paths:
50
+ - lib
51
+ required_ruby_version: !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ! '>='
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ required_rubygems_version: !ruby/object:Gem::Requirement
58
+ none: false
59
+ requirements:
60
+ - - ! '>'
61
+ - !ruby/object:Gem::Version
62
+ version: 1.3.1
63
+ requirements: []
64
+ rubyforge_project:
65
+ rubygems_version: 1.8.10
66
+ signing_key:
67
+ specification_version: 3
68
+ summary: CSV extension
69
+ test_files:
70
+ - spec/heuristic-csv_spec.rb
71
+ - spec/spec_helper.rb