nesquikcsv 0.1.6-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +21 -0
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +47 -0
- data/Rakefile +29 -0
- data/ext/csv_parser/extconf.rb +15 -0
- data/ext/csv_parser/parser.c +101 -0
- data/lib/nesquikcsv/version.rb +3 -0
- data/lib/nesquikcsv.rb +133 -0
- data/nesquikcsv.gemspec +28 -0
- data/test/tc_csv_parsing.rb +146 -0
- data/test/tc_interface.rb +142 -0
- metadata +86 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 255038dee22845b62044b5fe0df5631cc6379ce5
|
4
|
+
data.tar.gz: cb66e0db4ef59b7f466c6127d45e0420938556e2
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 80879dc0e7b5ade30f51f6cbbb1980f190e11115f4c15ba65a78a294f79638efe6e20eed54259f34f8670559529b79c21af0ea0ca72b6ef734d6395811240283
|
7
|
+
data.tar.gz: fca3005766cf84bde3c23c01613a61074db0f9d5981e7e655602f9f896243e3da41ca26ee95a92dac7578a74b67b8c4c1402117e2f15c653fcf1fd5581dda422
|
data/.gitignore
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
.bundle
|
4
|
+
.config
|
5
|
+
.yardoc
|
6
|
+
.DS_Store
|
7
|
+
Gemfile.lock
|
8
|
+
InstalledFiles
|
9
|
+
_yardoc
|
10
|
+
coverage
|
11
|
+
doc/
|
12
|
+
lib/bundler/man
|
13
|
+
pkg
|
14
|
+
rdoc
|
15
|
+
spec/reports
|
16
|
+
test/tmp
|
17
|
+
test/version_tmp
|
18
|
+
tmp
|
19
|
+
lib/*.bundle
|
20
|
+
lib/*.jar
|
21
|
+
lib/*.so
|
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2012, 2013 Maarten Oelering
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
# NesquikCSV
|
2
|
+
|
3
|
+
Fork of the Fastest-CSV gem to support any encoding, using UTF-8 by default.
|
4
|
+
|
5
|
+
Uses native C code to parse CSV lines in MRI Ruby.
|
6
|
+
|
7
|
+
Supports standard CSV according to RFC4180. Not the so-called "csv" from Excel.
|
8
|
+
|
9
|
+
The interface is a subset of the CSV interface in Ruby 1.9.3. The options parameter is not supported.
|
10
|
+
|
11
|
+
## Installation
|
12
|
+
|
13
|
+
Add this line to your application's Gemfile:
|
14
|
+
|
15
|
+
gem 'nesquikcsv'
|
16
|
+
|
17
|
+
And then execute:
|
18
|
+
|
19
|
+
bundle
|
20
|
+
|
21
|
+
Or install it yourself as:
|
22
|
+
|
23
|
+
gem install nesquikcsv
|
24
|
+
|
25
|
+
## Usage
|
26
|
+
|
27
|
+
Parse single line
|
28
|
+
|
29
|
+
# If no encoding is specified UTF-8 is assumed
|
30
|
+
NesquikCSV.parse_line "one,two,three"
|
31
|
+
=> ["one", "two", "three"]
|
32
|
+
|
33
|
+
# You can also specify an encoding of choice
|
34
|
+
NesquikCSV.parse_line "uno,dós,trés", "ASCII-8BIT"
|
35
|
+
=> ["uno", "d\xC3\xB3s", "tr\xC3\xA9s"]
|
36
|
+
|
37
|
+
Parse string in array of arrays
|
38
|
+
|
39
|
+
# Read file contents into string
|
40
|
+
csv_data = "one,two,three\nfour,five"
|
41
|
+
# Defaults to UTF-8 encoding
|
42
|
+
rows = NesquikCSV.parse(csv_data)
|
43
|
+
=> [["one", "two", "three"], ["four", "five"]]
|
44
|
+
# Or explicitly
|
45
|
+
rows = NesquikCSV.parse(csv_data, "UTF-8")
|
46
|
+
=> [["one", "two", "three"], ["four", "five"]]
|
47
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
#!/usr/bin/env rake
|
2
|
+
require "bundler/gem_tasks"
|
3
|
+
|
4
|
+
spec = Gem::Specification.load('nesquikcsv.gemspec')
|
5
|
+
|
6
|
+
if RUBY_PLATFORM =~ /java/
|
7
|
+
# TODO
|
8
|
+
else
|
9
|
+
require 'rake/extensiontask'
|
10
|
+
Rake::ExtensionTask.new('csv_parser', spec)
|
11
|
+
end
|
12
|
+
|
13
|
+
task :console do
|
14
|
+
require 'irb'
|
15
|
+
require 'irb/completion'
|
16
|
+
require 'nesquikcsv'
|
17
|
+
ARGV.clear
|
18
|
+
IRB.start
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'rake/testtask'
|
22
|
+
Rake::TestTask.new do |t|
|
23
|
+
t.libs << "test"
|
24
|
+
t.test_files = FileList['test/tc_*.rb']
|
25
|
+
#test.libs << 'lib' << 'test'
|
26
|
+
#test.pattern = 'test/**/test_*.rb'
|
27
|
+
#test.verbose = true
|
28
|
+
end
|
29
|
+
|
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
require 'mkmf'
|
4
|
+
extension_name = 'csv_parser'
|
5
|
+
#dir_config(extension_name)
|
6
|
+
|
7
|
+
if RUBY_VERSION =~ /1.8/ then
|
8
|
+
$CPPFLAGS += " -DRUBY_18"
|
9
|
+
end
|
10
|
+
|
11
|
+
#if CONFIG["arch"] =~ /mswin32|mingw/
|
12
|
+
# $CFLAGS += " -march=i686"
|
13
|
+
#end
|
14
|
+
|
15
|
+
create_makefile(extension_name)
|
@@ -0,0 +1,101 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) Maarten Oelering, BrightCode BV
|
3
|
+
*/
|
4
|
+
|
5
|
+
#include "ruby.h"
|
6
|
+
#ifdef RUBY_18
|
7
|
+
#include "rubyio.h"
|
8
|
+
#else
|
9
|
+
#include "ruby/io.h"
|
10
|
+
#endif
|
11
|
+
|
12
|
+
#define DEF_ARRAY_LEN 32
|
13
|
+
|
14
|
+
#define UNQUOTED 0
|
15
|
+
#define IN_QUOTED 1
|
16
|
+
#define QUOTE_IN_QUOTED 2
|
17
|
+
|
18
|
+
static VALUE mCsvParser;
|
19
|
+
|
20
|
+
static VALUE parse_line(VALUE self, VALUE str, VALUE encoding)
|
21
|
+
{
|
22
|
+
if (NIL_P(str))
|
23
|
+
return Qnil;
|
24
|
+
|
25
|
+
const char *ptr = RSTRING_PTR(str);
|
26
|
+
int len = (int) RSTRING_LEN(str); /* cast to prevent warning in 64-bit OS */
|
27
|
+
|
28
|
+
if (len == 0)
|
29
|
+
return Qnil;
|
30
|
+
|
31
|
+
VALUE array = rb_ary_new2(DEF_ARRAY_LEN); /* default allocated size is 16 */
|
32
|
+
char value[len]; /* field value, no longer than line */
|
33
|
+
int state = 0;
|
34
|
+
int index = 0;
|
35
|
+
int i;
|
36
|
+
char c;
|
37
|
+
|
38
|
+
/* Grab encoding to be used for string malloc */
|
39
|
+
rb_encoding* rb_encoding = rb_enc_find(RSTRING_PTR(encoding));
|
40
|
+
if(rb_encoding == NULL)
|
41
|
+
return Qnil;
|
42
|
+
|
43
|
+
for (i = 0; i < len; i++)
|
44
|
+
{
|
45
|
+
c = ptr[i];
|
46
|
+
switch (c)
|
47
|
+
{
|
48
|
+
case ',':
|
49
|
+
if (state == UNQUOTED) {
|
50
|
+
rb_ary_push(array, (index == 0 ? Qnil: rb_enc_str_new(value, index, rb_encoding)));
|
51
|
+
index = 0;
|
52
|
+
}
|
53
|
+
else if (state == IN_QUOTED) {
|
54
|
+
value[index++] = c;
|
55
|
+
}
|
56
|
+
else if (state == QUOTE_IN_QUOTED) {
|
57
|
+
rb_ary_push(array, rb_enc_str_new(value, index, rb_encoding));
|
58
|
+
index = 0;
|
59
|
+
state = UNQUOTED;
|
60
|
+
}
|
61
|
+
break;
|
62
|
+
case '"':
|
63
|
+
if (state == UNQUOTED) {
|
64
|
+
state = IN_QUOTED;
|
65
|
+
}
|
66
|
+
else if (state == IN_QUOTED) {
|
67
|
+
state = QUOTE_IN_QUOTED;
|
68
|
+
}
|
69
|
+
else if (state == QUOTE_IN_QUOTED) {
|
70
|
+
value[index++] = c; /* escaped quote */
|
71
|
+
state = IN_QUOTED;
|
72
|
+
}
|
73
|
+
break;
|
74
|
+
case 13: /* \r */
|
75
|
+
case 10: /* \n */
|
76
|
+
if (state == IN_QUOTED) {
|
77
|
+
value[index++] = c;
|
78
|
+
}
|
79
|
+
else {
|
80
|
+
i = len; /* only parse first line if multiline */
|
81
|
+
}
|
82
|
+
break;
|
83
|
+
default:
|
84
|
+
value[index++] = c;
|
85
|
+
}
|
86
|
+
}
|
87
|
+
|
88
|
+
if (state == UNQUOTED) {
|
89
|
+
rb_ary_push(array, (index == 0 ? Qnil: rb_enc_str_new(value, index, rb_encoding)));
|
90
|
+
}
|
91
|
+
else if (state == QUOTE_IN_QUOTED) {
|
92
|
+
rb_ary_push(array, rb_enc_str_new(value, index, rb_encoding));
|
93
|
+
}
|
94
|
+
return array;
|
95
|
+
}
|
96
|
+
|
97
|
+
void Init_csv_parser()
|
98
|
+
{
|
99
|
+
mCsvParser = rb_define_module("CsvParser");
|
100
|
+
rb_define_module_function(mCsvParser, "parse_line", parse_line, 2);
|
101
|
+
}
|
data/lib/nesquikcsv.rb
ADDED
@@ -0,0 +1,133 @@
|
|
1
|
+
if RUBY_PLATFORM =~ /java/
|
2
|
+
require 'csv'
|
3
|
+
else
|
4
|
+
require 'csv_parser'
|
5
|
+
end
|
6
|
+
require 'stringio'
|
7
|
+
|
8
|
+
# Fast CSV parser using native code
|
9
|
+
class NesquikCSV
|
10
|
+
include Enumerable
|
11
|
+
|
12
|
+
# Pass each line of the specified +path+ as array to the provided +block+
|
13
|
+
def self.foreach(path, &block)
|
14
|
+
open(path) do |reader|
|
15
|
+
reader.each(&block)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# Opens a csv file. Pass a NesquikCSV instance to the provided block,
|
20
|
+
# or return it when no block is provided
|
21
|
+
def self.open(path, mode = "rb")
|
22
|
+
csv = new(File.open(path, mode))
|
23
|
+
if block_given?
|
24
|
+
begin
|
25
|
+
yield csv
|
26
|
+
ensure
|
27
|
+
csv.close
|
28
|
+
end
|
29
|
+
else
|
30
|
+
csv
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# Read all lines from the specified +path+ into an array of arrays
|
35
|
+
def self.read(path)
|
36
|
+
open(path, "rb") { |csv| csv.read }
|
37
|
+
end
|
38
|
+
|
39
|
+
# Alias for read
|
40
|
+
def self.readlines(path)
|
41
|
+
read(path)
|
42
|
+
end
|
43
|
+
|
44
|
+
# Read all lines from the specified String into an array of arrays
|
45
|
+
def self.parse(data, encoding="UTF-8", &block)
|
46
|
+
csv = new(StringIO.new(data), encoding)
|
47
|
+
if block.nil?
|
48
|
+
begin
|
49
|
+
csv.read
|
50
|
+
ensure
|
51
|
+
csv.close
|
52
|
+
end
|
53
|
+
else
|
54
|
+
csv.each(&block)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.parse_line(line, encoding="UTF-8")
|
59
|
+
if RUBY_PLATFORM =~ /java/
|
60
|
+
CSV.parse_line(line.force_encoding(encoding))
|
61
|
+
else
|
62
|
+
CsvParser.parse_line(line, encoding)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# Create new NesquikCSV wrapping the specified IO object
|
67
|
+
def initialize(io, encoding="UTF-8")
|
68
|
+
@io = io
|
69
|
+
@encoding = encoding
|
70
|
+
end
|
71
|
+
|
72
|
+
# Read from the wrapped IO passing each line as array to the specified block
|
73
|
+
def each
|
74
|
+
if block_given?
|
75
|
+
while row = shift(@encoding)
|
76
|
+
yield row
|
77
|
+
end
|
78
|
+
else
|
79
|
+
to_enum # return enumerator
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# Read all remaining lines from the wrapped IO into an array of arrays
|
84
|
+
def read
|
85
|
+
table = Array.new
|
86
|
+
each {|row| table << row}
|
87
|
+
table
|
88
|
+
end
|
89
|
+
alias_method :readlines, :read
|
90
|
+
|
91
|
+
# Rewind the underlying IO object and reset line counter
|
92
|
+
def rewind
|
93
|
+
@io.rewind
|
94
|
+
end
|
95
|
+
|
96
|
+
# Read next line from the wrapped IO and return as array or nil at EOF
|
97
|
+
def shift(encoding='UTF-8')
|
98
|
+
if line = get_line_with_quotes
|
99
|
+
if RUBY_PLATFORM =~ /java/
|
100
|
+
CSV.parse_line(line.force_encoding(encoding))
|
101
|
+
else
|
102
|
+
CsvParser.parse_line(line, encoding)
|
103
|
+
end
|
104
|
+
else
|
105
|
+
nil
|
106
|
+
end
|
107
|
+
end
|
108
|
+
alias_method :gets, :shift
|
109
|
+
alias_method :readline, :shift
|
110
|
+
|
111
|
+
# Close the wrapped IO
|
112
|
+
def close
|
113
|
+
@io.close
|
114
|
+
end
|
115
|
+
|
116
|
+
def closed?
|
117
|
+
@io.closed?
|
118
|
+
end
|
119
|
+
|
120
|
+
def get_line_with_quotes
|
121
|
+
line = @io.gets
|
122
|
+
if !line.nil?
|
123
|
+
while line.count('"').odd?
|
124
|
+
next_line = @io.gets
|
125
|
+
break if next_line.nil?
|
126
|
+
line << next_line
|
127
|
+
end
|
128
|
+
end
|
129
|
+
line
|
130
|
+
end
|
131
|
+
|
132
|
+
|
133
|
+
end
|
data/nesquikcsv.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/nesquikcsv/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Juan Martty"]
|
6
|
+
gem.email = ["null.terminated.string@gmail.com"]
|
7
|
+
gem.description = %q{Fastest-CSV fork with encoding support}
|
8
|
+
gem.summary = %q{Fastest-CSV fork with encoding support}
|
9
|
+
gem.homepage = "https://github.com/jmartty/nesquikcsv"
|
10
|
+
|
11
|
+
gem.files = `git ls-files`.split($\)
|
12
|
+
#gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
13
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
14
|
+
gem.name = "nesquikcsv"
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
gem.version = NesquikCSV::VERSION
|
17
|
+
|
18
|
+
if RUBY_PLATFORM =~ /java/
|
19
|
+
gem.platform = "java"
|
20
|
+
else
|
21
|
+
gem.extensions = ['ext/csv_parser/extconf.rb']
|
22
|
+
end
|
23
|
+
|
24
|
+
gem.add_development_dependency "test-unit"
|
25
|
+
gem.add_development_dependency "rake-compiler"
|
26
|
+
|
27
|
+
gem.license = 'MIT'
|
28
|
+
end
|
@@ -0,0 +1,146 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
#
|
3
|
+
# Tests copied from faster_csv by James Edward Gray II
|
4
|
+
#
|
5
|
+
|
6
|
+
if RUBY_DESCRIPTION =~ /jruby/
|
7
|
+
# TODO
|
8
|
+
# These tests dont make sense for jruby since there's no Java extension
|
9
|
+
else
|
10
|
+
|
11
|
+
require 'test/unit'
|
12
|
+
require 'nesquikcsv'
|
13
|
+
|
14
|
+
#
|
15
|
+
# Following tests are my interpretation of the
|
16
|
+
# {CSV RCF}[http://www.ietf.org/rfc/rfc4180.txt]. I only deviate from that
|
17
|
+
# document in one place (intentionally) and that is to make the default row
|
18
|
+
# separator <tt>$/</tt>.
|
19
|
+
#
|
20
|
+
class TestCSVParsing < Test::Unit::TestCase
|
21
|
+
|
22
|
+
def test_mastering_regex_example
|
23
|
+
ex = %Q{Ten Thousand,10000, 2710 ,,"10,000","It's ""10 Grand"", baby",10K}
|
24
|
+
assert_equal( [ "Ten Thousand", "10000", " 2710 ", nil, "10,000",
|
25
|
+
"It's \"10 Grand\", baby", "10K" ],
|
26
|
+
CsvParser.parse_line(ex, "UTF-8") )
|
27
|
+
end
|
28
|
+
|
29
|
+
# Pulled from: http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/ruby/test/csv/test_csv.rb?rev=1.12.2.2;content-type=text%2Fplain
|
30
|
+
def test_std_lib_csv
|
31
|
+
[ ["\t", ["\t"]],
|
32
|
+
["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]],
|
33
|
+
["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]],
|
34
|
+
["\"\"\"\n\",\"\"\"\n\"", ["\"\n", "\"\n"]],
|
35
|
+
["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]],
|
36
|
+
["\"\"", [""]],
|
37
|
+
["foo,\"\"\"\",baz", ["foo", "\"", "baz"]],
|
38
|
+
["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]],
|
39
|
+
["foo,\"\r\",baz", ["foo", "\r", "baz"]],
|
40
|
+
["foo,\"\",baz", ["foo", "", "baz"]],
|
41
|
+
["\",\"", [","]],
|
42
|
+
["foo", ["foo"]],
|
43
|
+
[",,", [nil, nil, nil]],
|
44
|
+
[",", [nil, nil]],
|
45
|
+
["foo,\"\n\",baz", ["foo", "\n", "baz"]],
|
46
|
+
["foo,,baz", ["foo", nil, "baz"]],
|
47
|
+
["\"\"\"\r\",\"\"\"\r\"", ["\"\r", "\"\r"]],
|
48
|
+
["\",\",\",\"", [",", ","]],
|
49
|
+
["foo,bar,", ["foo", "bar", nil]],
|
50
|
+
[",foo,bar", [nil, "foo", "bar"]],
|
51
|
+
["foo,bar", ["foo", "bar"]],
|
52
|
+
[";", [";"]],
|
53
|
+
["\t,\t", ["\t", "\t"]],
|
54
|
+
["foo,\"\r\n\r\",baz", ["foo", "\r\n\r", "baz"]],
|
55
|
+
["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]],
|
56
|
+
["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]],
|
57
|
+
[";,;", [";", ";"]] ].each do |csv_test|
|
58
|
+
assert_equal(csv_test.last, CsvParser.parse_line(csv_test.first, "UTF-8"))
|
59
|
+
end
|
60
|
+
|
61
|
+
[ ["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]],
|
62
|
+
["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]],
|
63
|
+
["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]],
|
64
|
+
["\"\"", [""]],
|
65
|
+
["foo,\"\"\"\",baz", ["foo", "\"", "baz"]],
|
66
|
+
["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]],
|
67
|
+
["foo,\"\r\",baz", ["foo", "\r", "baz"]],
|
68
|
+
["foo,\"\",baz", ["foo", "", "baz"]],
|
69
|
+
["foo", ["foo"]],
|
70
|
+
[",,", [nil, nil, nil]],
|
71
|
+
[",", [nil, nil]],
|
72
|
+
["foo,\"\n\",baz", ["foo", "\n", "baz"]],
|
73
|
+
["foo,,baz", ["foo", nil, "baz"]],
|
74
|
+
["foo,bar", ["foo", "bar"]],
|
75
|
+
["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]],
|
76
|
+
["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]] ].each do |csv_test|
|
77
|
+
assert_equal(csv_test.last, CsvParser.parse_line(csv_test.first, "UTF-8"))
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# From: http://ruby-talk.org/cgi-bin/scat.rb/ruby/ruby-core/6496
|
82
|
+
def test_aras_edge_cases
|
83
|
+
[ [%Q{a,b}, ["a", "b"]],
|
84
|
+
[%Q{a,"""b"""}, ["a", "\"b\""]],
|
85
|
+
[%Q{a,"""b"}, ["a", "\"b"]],
|
86
|
+
[%Q{a,"b"""}, ["a", "b\""]],
|
87
|
+
[%Q{a,"\nb"""}, ["a", "\nb\""]],
|
88
|
+
[%Q{a,"""\nb"}, ["a", "\"\nb"]],
|
89
|
+
[%Q{a,"""\nb\n"""}, ["a", "\"\nb\n\""]],
|
90
|
+
[%Q{a,"""\nb\n""",\nc}, ["a", "\"\nb\n\"", nil]],
|
91
|
+
[%Q{a,,,}, ["a", nil, nil, nil]],
|
92
|
+
[%Q{,}, [nil, nil]],
|
93
|
+
[%Q{"",""}, ["", ""]],
|
94
|
+
[%Q{""""}, ["\""]],
|
95
|
+
[%Q{"""",""}, ["\"",""]],
|
96
|
+
[%Q{,""}, [nil,""]],
|
97
|
+
[%Q{,"\r"}, [nil,"\r"]],
|
98
|
+
[%Q{"\r\n,"}, ["\r\n,"]],
|
99
|
+
[%Q{"\r\n,",}, ["\r\n,", nil]] ].each do |edge_case|
|
100
|
+
assert_equal(edge_case.last, CsvParser.parse_line(edge_case.first, "UTF-8"))
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_james_edge_cases
|
105
|
+
# A read at eof? should return nil.
|
106
|
+
assert_equal(nil, CsvParser.parse_line("", "UTF-8"))
|
107
|
+
#
|
108
|
+
# With CSV it's impossible to tell an empty line from a line containing a
|
109
|
+
# single +nil+ field. The standard CSV library returns <tt>[nil]</tt>
|
110
|
+
# in these cases, but <tt>Array.new</tt> makes more sense to me.
|
111
|
+
#
|
112
|
+
#assert_equal(Array.new, NesquikCSV.parse_line("\n1,2,3\n"))
|
113
|
+
assert_equal([nil], CsvParser.parse_line("\n1,2,3\n", "UTF-8"))
|
114
|
+
end
|
115
|
+
|
116
|
+
def test_rob_edge_cases
|
117
|
+
[ [%Q{"a\nb"}, ["a\nb"]],
|
118
|
+
[%Q{"\n\n\n"}, ["\n\n\n"]],
|
119
|
+
[%Q{a,"b\n\nc"}, ['a', "b\n\nc"]],
|
120
|
+
[%Q{,"\r\n"}, [nil,"\r\n"]],
|
121
|
+
[%Q{,"\r\n."}, [nil,"\r\n."]],
|
122
|
+
[%Q{"a\na","one newline"}, ["a\na", 'one newline']],
|
123
|
+
[%Q{"a\n\na","two newlines"}, ["a\n\na", 'two newlines']],
|
124
|
+
[%Q{"a\r\na","one CRLF"}, ["a\r\na", 'one CRLF']],
|
125
|
+
[%Q{"a\r\n\r\na","two CRLFs"}, ["a\r\n\r\na", 'two CRLFs']],
|
126
|
+
[%Q{with blank,"start\n\nfinish"\n}, ['with blank', "start\n\nfinish"]],
|
127
|
+
].each do |edge_case|
|
128
|
+
assert_equal(edge_case.last, CsvParser.parse_line(edge_case.first, "UTF-8"))
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
def test_encoding
|
133
|
+
assert_equal(["ñ","ó","¸"], CsvParser.parse_line("ñ,ó,¸", "UTF-8"))
|
134
|
+
end
|
135
|
+
|
136
|
+
def test_quoted_line_break
|
137
|
+
assert_equal(["foo","bar,baz\nbeam","bee"], CsvParser.parse_line("foo,\"bar,baz\nbeam\",bee", "UTF-8"))
|
138
|
+
end
|
139
|
+
|
140
|
+
def test_quoted_line_break_at_end
|
141
|
+
assert_equal(["foo","bar,baz\n","bee"], CsvParser.parse_line("foo,\"bar,baz\n\",bee", "UTF-8"))
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|
@@ -0,0 +1,142 @@
|
|
1
|
+
#
|
2
|
+
# Tests copied from faster_csv by James Edward Gray II
|
3
|
+
#
|
4
|
+
|
5
|
+
require 'test/unit'
|
6
|
+
require 'nesquikcsv'
|
7
|
+
|
8
|
+
class TestNesquikCSVInterface < Test::Unit::TestCase
|
9
|
+
|
10
|
+
def setup
|
11
|
+
@path = File.join(File.dirname(__FILE__), "temp_test_data.csv")
|
12
|
+
|
13
|
+
File.open(@path, "w") do |file|
|
14
|
+
file << "1,2,3\r\n"
|
15
|
+
file << "4,5\r\n"
|
16
|
+
end
|
17
|
+
|
18
|
+
@expected = [%w{1 2 3}, %w{4 5}]
|
19
|
+
end
|
20
|
+
|
21
|
+
def teardown
|
22
|
+
File.unlink(@path)
|
23
|
+
end
|
24
|
+
|
25
|
+
### Test Read Interface ###
|
26
|
+
|
27
|
+
def test_foreach
|
28
|
+
NesquikCSV.foreach(@path) do |row|
|
29
|
+
assert_equal(@expected.shift, row)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_open_and_close
|
34
|
+
csv = NesquikCSV.open(@path, "r+")
|
35
|
+
assert_not_nil(csv)
|
36
|
+
assert_instance_of(NesquikCSV, csv)
|
37
|
+
assert_equal(false, csv.closed?)
|
38
|
+
csv.close
|
39
|
+
assert(csv.closed?)
|
40
|
+
|
41
|
+
ret = NesquikCSV.open(@path) do |csv|
|
42
|
+
assert_instance_of(NesquikCSV, csv)
|
43
|
+
"Return value."
|
44
|
+
end
|
45
|
+
assert(csv.closed?)
|
46
|
+
assert_equal("Return value.", ret)
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_parse
|
50
|
+
data = File.read(@path)
|
51
|
+
assert_equal( @expected,
|
52
|
+
NesquikCSV.parse(data) )
|
53
|
+
|
54
|
+
NesquikCSV.parse(data) do |row|
|
55
|
+
assert_equal(@expected.shift, row)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
#def test_parse_line
|
60
|
+
# row = FasterCSV.parse_line("1;2;3", :col_sep => ";")
|
61
|
+
# assert_not_nil(row)
|
62
|
+
# assert_instance_of(Array, row)
|
63
|
+
# assert_equal(%w{1 2 3}, row)
|
64
|
+
#
|
65
|
+
# # shortcut interface
|
66
|
+
# row = "1;2;3".parse_csv(:col_sep => ";")
|
67
|
+
# assert_not_nil(row)
|
68
|
+
# assert_instance_of(Array, row)
|
69
|
+
# assert_equal(%w{1 2 3}, row)
|
70
|
+
#end
|
71
|
+
|
72
|
+
def test_parse_line_with_empty_lines
|
73
|
+
assert_equal(nil, NesquikCSV.parse_line("", "UTF-8")) # to signal eof
|
74
|
+
#assert_equal(Array.new, NesquikCSV.parse_line("\n1,2,3"))
|
75
|
+
# Test removed because it didn't respect CSV's interface: [] != [nil]
|
76
|
+
#assert_equal([nil], NesquikCSV.parse_line("\n1,2,3", "UTF-8"))
|
77
|
+
end
|
78
|
+
|
79
|
+
def test_read_and_readlines
|
80
|
+
assert_equal( @expected,
|
81
|
+
NesquikCSV.read(@path) )
|
82
|
+
assert_equal( @expected,
|
83
|
+
NesquikCSV.readlines(@path))
|
84
|
+
|
85
|
+
|
86
|
+
data = NesquikCSV.open(@path) do |csv|
|
87
|
+
csv.read
|
88
|
+
end
|
89
|
+
assert_equal(@expected, data)
|
90
|
+
data = NesquikCSV.open(@path) do |csv|
|
91
|
+
csv.readlines
|
92
|
+
end
|
93
|
+
assert_equal(@expected, data)
|
94
|
+
end
|
95
|
+
|
96
|
+
#def test_table
|
97
|
+
# table = NesquikCSV.table(@path)
|
98
|
+
# assert_instance_of(NesquikCSV::Table, table)
|
99
|
+
# assert_equal([[:"1", :"2", :"3"], [4, 5, nil]], table.to_a)
|
100
|
+
#end
|
101
|
+
|
102
|
+
def test_shift # aliased as gets() and readline()
|
103
|
+
NesquikCSV.open(@path, "r+") do |csv|
|
104
|
+
assert_equal(@expected.shift, csv.shift)
|
105
|
+
assert_equal(@expected.shift, csv.shift)
|
106
|
+
assert_equal(nil, csv.shift)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def test_long_line # ruby's regex parser may have problems with long rows
|
111
|
+
File.unlink(@path)
|
112
|
+
|
113
|
+
long_field_length = 2800
|
114
|
+
File.open(@path, "w") do |file|
|
115
|
+
file << "1,2,#{'3' * long_field_length}\r\n"
|
116
|
+
end
|
117
|
+
@expected = [%w{1 2} + ['3' * long_field_length]]
|
118
|
+
test_shift
|
119
|
+
end
|
120
|
+
|
121
|
+
def test_enumerable
|
122
|
+
NesquikCSV.open(@path) do |csv|
|
123
|
+
assert(csv.include?(["1", "2", "3"]))
|
124
|
+
csv.rewind
|
125
|
+
assert_equal([["1", "2", "3"], ["4", "5"]], csv.to_a)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
def test_multiline
|
130
|
+
assert_equal([["foo"],["bar"]], NesquikCSV.parse("foo\nbar"))
|
131
|
+
end
|
132
|
+
|
133
|
+
def test_quoted_line_break
|
134
|
+
assert_equal([["foo","bar,baz\nbeam","bee"],["one","two"]], NesquikCSV.parse("foo,\"bar,baz\nbeam\",bee\none,two", "UTF-8"))
|
135
|
+
end
|
136
|
+
|
137
|
+
def test_quoted_line_break_at_end
|
138
|
+
assert_equal([[nil,"foo,\nbar,baz","beam"],["one","two"]], NesquikCSV.parse(",\"foo,\nbar,baz\",beam\none,two", "UTF-8"))
|
139
|
+
end
|
140
|
+
|
141
|
+
|
142
|
+
end
|
metadata
ADDED
@@ -0,0 +1,86 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: nesquikcsv
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.6
|
5
|
+
platform: java
|
6
|
+
authors:
|
7
|
+
- Juan Martty
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-01-02 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: test-unit
|
15
|
+
version_requirements: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
requirement: !ruby/object:Gem::Requirement
|
21
|
+
requirements:
|
22
|
+
- - '>='
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: '0'
|
25
|
+
prerelease: false
|
26
|
+
type: :development
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake-compiler
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
35
|
+
requirements:
|
36
|
+
- - '>='
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
prerelease: false
|
40
|
+
type: :development
|
41
|
+
description: Fastest-CSV fork with encoding support
|
42
|
+
email:
|
43
|
+
- null.terminated.string@gmail.com
|
44
|
+
executables: []
|
45
|
+
extensions: []
|
46
|
+
extra_rdoc_files: []
|
47
|
+
files:
|
48
|
+
- .gitignore
|
49
|
+
- Gemfile
|
50
|
+
- LICENSE
|
51
|
+
- README.md
|
52
|
+
- Rakefile
|
53
|
+
- ext/csv_parser/extconf.rb
|
54
|
+
- ext/csv_parser/parser.c
|
55
|
+
- lib/nesquikcsv.rb
|
56
|
+
- lib/nesquikcsv/version.rb
|
57
|
+
- nesquikcsv.gemspec
|
58
|
+
- test/tc_csv_parsing.rb
|
59
|
+
- test/tc_interface.rb
|
60
|
+
homepage: https://github.com/jmartty/nesquikcsv
|
61
|
+
licenses:
|
62
|
+
- MIT
|
63
|
+
metadata: {}
|
64
|
+
post_install_message:
|
65
|
+
rdoc_options: []
|
66
|
+
require_paths:
|
67
|
+
- lib
|
68
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
69
|
+
requirements:
|
70
|
+
- - '>='
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: '0'
|
73
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
74
|
+
requirements:
|
75
|
+
- - '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
requirements: []
|
79
|
+
rubyforge_project:
|
80
|
+
rubygems_version: 2.4.5
|
81
|
+
signing_key:
|
82
|
+
specification_version: 4
|
83
|
+
summary: Fastest-CSV fork with encoding support
|
84
|
+
test_files:
|
85
|
+
- test/tc_csv_parsing.rb
|
86
|
+
- test/tc_interface.rb
|