fastest-csv 0.0.1 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +3 -0
- data/README.md +2 -2
- data/Rakefile +20 -0
- data/ext/csv_parser/CsvParser.java +89 -0
- data/ext/csv_parser/CsvParserService.java +115 -0
- data/ext/csv_parser/extconf.rb +7 -1
- data/ext/csv_parser/parser.c +21 -20
- data/fastest-csv.gemspec +10 -3
- data/lib/fastest-csv/version.rb +1 -1
- data/lib/fastest_csv.rb +42 -8
- data/test/tc_csv_parsing.rb +11 -7
- data/test/tc_interface.rb +9 -0
- metadata +24 -6
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
# FastestCSV
|
2
2
|
|
3
|
-
Fastest CSV class for MRI Ruby. Faster than faster_csv and fasterer-csv.
|
3
|
+
Fastest CSV class for MRI Ruby and JRuby. Faster than faster_csv and fasterer-csv.
|
4
4
|
|
5
|
-
Uses native C code to parse CSV lines
|
5
|
+
Uses native C code to parse CSV lines in MRI Ruby and Java in JRuby.
|
6
6
|
|
7
7
|
Supports standard CSV according to RFC4180. Not the so-called "csv" from Excel.
|
8
8
|
|
data/Rakefile
CHANGED
@@ -1,2 +1,22 @@
|
|
1
1
|
#!/usr/bin/env rake
|
2
2
|
require "bundler/gem_tasks"
|
3
|
+
|
4
|
+
spec = Gem::Specification.load('fastest-csv.gemspec')
|
5
|
+
|
6
|
+
if RUBY_PLATFORM =~ /java/
|
7
|
+
require 'rake/javaextensiontask'
|
8
|
+
Rake::JavaExtensionTask.new('csv_parser', spec)
|
9
|
+
else
|
10
|
+
require 'rake/extensiontask'
|
11
|
+
Rake::ExtensionTask.new('csv_parser', spec)
|
12
|
+
end
|
13
|
+
|
14
|
+
require 'rake/testtask'
|
15
|
+
Rake::TestTask.new do |t|
|
16
|
+
t.libs << "test"
|
17
|
+
t.test_files = FileList['test/tc_*.rb']
|
18
|
+
#test.libs << 'lib' << 'test'
|
19
|
+
#test.pattern = 'test/**/test_*.rb'
|
20
|
+
#test.verbose = true
|
21
|
+
end
|
22
|
+
|
@@ -0,0 +1,89 @@
|
|
1
|
+
//
|
2
|
+
// Copyright (c) Maarten Oelering, BrightCode BV
|
3
|
+
//
|
4
|
+
|
5
|
+
package org.brightcode;
|
6
|
+
|
7
|
+
import java.util.ArrayList;
|
8
|
+
import java.util.List;
|
9
|
+
|
10
|
+
public class CsvParser {
|
11
|
+
|
12
|
+
private static int DEF_ARRAY_LEN = 32;
|
13
|
+
|
14
|
+
private static int UNQUOTED = 0;
|
15
|
+
private static int IN_QUOTED = 1;
|
16
|
+
private static int QUOTE_IN_QUOTED = 2;
|
17
|
+
|
18
|
+
public static List parseLine(String line) {
|
19
|
+
int length = line.length();
|
20
|
+
if (length == 0)
|
21
|
+
return null;
|
22
|
+
|
23
|
+
int state = UNQUOTED;
|
24
|
+
StringBuilder value = new StringBuilder(length); // field value, no longer than line
|
25
|
+
List<String> array = new ArrayList<String>(DEF_ARRAY_LEN);
|
26
|
+
|
27
|
+
for (int i = 0; i < length; i++) {
|
28
|
+
char c = line.charAt(i);
|
29
|
+
switch (c) {
|
30
|
+
case ',':
|
31
|
+
if (state == UNQUOTED) {
|
32
|
+
if (value.length() == 0) {
|
33
|
+
array.add(null);
|
34
|
+
}
|
35
|
+
else {
|
36
|
+
array.add(value.toString());
|
37
|
+
value.setLength(0);
|
38
|
+
}
|
39
|
+
}
|
40
|
+
else if (state == IN_QUOTED) {
|
41
|
+
value.append(c);
|
42
|
+
}
|
43
|
+
else if (state == 2) {
|
44
|
+
array.add(value.toString());
|
45
|
+
value.setLength(0);
|
46
|
+
state = UNQUOTED;
|
47
|
+
}
|
48
|
+
break;
|
49
|
+
case '"':
|
50
|
+
if (state == UNQUOTED) {
|
51
|
+
state = IN_QUOTED;
|
52
|
+
}
|
53
|
+
else if (state == IN_QUOTED) {
|
54
|
+
state = QUOTE_IN_QUOTED;
|
55
|
+
}
|
56
|
+
else if (state == QUOTE_IN_QUOTED) {
|
57
|
+
value.append(c); // escaped quote
|
58
|
+
state = IN_QUOTED;
|
59
|
+
}
|
60
|
+
break;
|
61
|
+
case '\r':
|
62
|
+
case '\n':
|
63
|
+
if (state == IN_QUOTED) {
|
64
|
+
value.append(c);
|
65
|
+
}
|
66
|
+
else {
|
67
|
+
i = length; // only parse first line if multiline
|
68
|
+
}
|
69
|
+
break;
|
70
|
+
default:
|
71
|
+
value.append(c);
|
72
|
+
break;
|
73
|
+
}
|
74
|
+
}
|
75
|
+
if (state == UNQUOTED) {
|
76
|
+
if (value.length() == 0) {
|
77
|
+
array.add(null);
|
78
|
+
}
|
79
|
+
else {
|
80
|
+
array.add(value.toString());
|
81
|
+
value.setLength(0);
|
82
|
+
}
|
83
|
+
}
|
84
|
+
else if (state == QUOTE_IN_QUOTED) {
|
85
|
+
array.add(value.toString());
|
86
|
+
}
|
87
|
+
return array;
|
88
|
+
}
|
89
|
+
}
|
@@ -0,0 +1,115 @@
|
|
1
|
+
//
|
2
|
+
// Copyright (c) Maarten Oelering, BrightCode BV
|
3
|
+
//
|
4
|
+
|
5
|
+
package org.brightcode;
|
6
|
+
|
7
|
+
import java.io.IOException;
|
8
|
+
|
9
|
+
import org.jruby.Ruby;
|
10
|
+
import org.jruby.RubyArray;
|
11
|
+
import org.jruby.RubyModule;
|
12
|
+
import org.jruby.RubyString;
|
13
|
+
import org.jruby.runtime.Block;
|
14
|
+
import org.jruby.runtime.CallbackFactory;
|
15
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
16
|
+
import org.jruby.runtime.load.BasicLibraryService;
|
17
|
+
|
18
|
+
public class CsvParserService implements BasicLibraryService {
|
19
|
+
|
20
|
+
private Ruby runtime;
|
21
|
+
|
22
|
+
private static int DEF_ARRAY_LEN = 32;
|
23
|
+
|
24
|
+
private static int UNQUOTED = 0;
|
25
|
+
private static int IN_QUOTED = 1;
|
26
|
+
private static int QUOTE_IN_QUOTED = 2;
|
27
|
+
|
28
|
+
// Initial setup function. Takes a reference to the current JRuby runtime and
|
29
|
+
// sets up our modules.
|
30
|
+
public boolean basicLoad(Ruby runtime) throws IOException {
|
31
|
+
this.runtime = runtime;
|
32
|
+
|
33
|
+
RubyModule mCsvParser = runtime.defineModule("CsvParser");
|
34
|
+
// TODO: CallbackFactory#getSingletonMethod is deprecated
|
35
|
+
CallbackFactory callbackFactory = runtime.callbackFactory(CsvParserService.class);
|
36
|
+
mCsvParser.defineModuleFunction("parse_line",
|
37
|
+
callbackFactory.getSingletonMethod("parseLine", RubyString.class));
|
38
|
+
return true;
|
39
|
+
}
|
40
|
+
|
41
|
+
public static IRubyObject parseLine(IRubyObject recv, RubyString line, Block unusedBlock) {
|
42
|
+
Ruby runtime = recv.getRuntime();
|
43
|
+
|
44
|
+
CharSequence seq = line.getValue();
|
45
|
+
int length = seq.length();
|
46
|
+
if (length == 0)
|
47
|
+
return runtime.getNil();
|
48
|
+
|
49
|
+
int state = UNQUOTED;
|
50
|
+
StringBuilder value = new StringBuilder(length); // field value, no longer than line
|
51
|
+
RubyArray array = RubyArray.newArray(runtime, DEF_ARRAY_LEN);
|
52
|
+
|
53
|
+
for (int i = 0; i < length; i++) {
|
54
|
+
char c = seq.charAt(i);
|
55
|
+
switch (c) {
|
56
|
+
case ',':
|
57
|
+
if (state == UNQUOTED) {
|
58
|
+
if (value.length() == 0) {
|
59
|
+
array.append(runtime.getNil());
|
60
|
+
}
|
61
|
+
else {
|
62
|
+
array.append(RubyString.newString(runtime, value));
|
63
|
+
value.setLength(0);
|
64
|
+
}
|
65
|
+
}
|
66
|
+
else if (state == IN_QUOTED) {
|
67
|
+
value.append(c);
|
68
|
+
}
|
69
|
+
else if (state == 2) {
|
70
|
+
array.append(RubyString.newString(runtime, value));
|
71
|
+
value.setLength(0);
|
72
|
+
state = UNQUOTED;
|
73
|
+
}
|
74
|
+
break;
|
75
|
+
case '"':
|
76
|
+
if (state == UNQUOTED) {
|
77
|
+
state = IN_QUOTED;
|
78
|
+
}
|
79
|
+
else if (state == IN_QUOTED) {
|
80
|
+
state = QUOTE_IN_QUOTED;
|
81
|
+
}
|
82
|
+
else if (state == QUOTE_IN_QUOTED) {
|
83
|
+
value.append(c); // escaped quote
|
84
|
+
state = IN_QUOTED;
|
85
|
+
}
|
86
|
+
break;
|
87
|
+
case '\r':
|
88
|
+
case '\n':
|
89
|
+
if (state == IN_QUOTED) {
|
90
|
+
value.append(c);
|
91
|
+
}
|
92
|
+
else {
|
93
|
+
i = length; // only parse first line if multiline
|
94
|
+
}
|
95
|
+
break;
|
96
|
+
default:
|
97
|
+
value.append(c);
|
98
|
+
break;
|
99
|
+
}
|
100
|
+
}
|
101
|
+
if (state == UNQUOTED) {
|
102
|
+
if (value.length() == 0) {
|
103
|
+
array.append(runtime.getNil());
|
104
|
+
}
|
105
|
+
else {
|
106
|
+
array.append(RubyString.newString(runtime, value));
|
107
|
+
value.setLength(0);
|
108
|
+
}
|
109
|
+
}
|
110
|
+
else if (state == QUOTE_IN_QUOTED) {
|
111
|
+
array.append(RubyString.newString(runtime, value));
|
112
|
+
}
|
113
|
+
return array;
|
114
|
+
}
|
115
|
+
}
|
data/ext/csv_parser/extconf.rb
CHANGED
@@ -1,9 +1,15 @@
|
|
1
1
|
#!/usr/bin/ruby -w
|
2
2
|
|
3
3
|
require 'mkmf'
|
4
|
+
extension_name = 'csv_parser'
|
5
|
+
#dir_config(extension_name)
|
4
6
|
|
5
7
|
if RUBY_VERSION =~ /1.8/ then
|
6
8
|
$CPPFLAGS += " -DRUBY_18"
|
7
9
|
end
|
8
10
|
|
9
|
-
|
11
|
+
#if CONFIG["arch"] =~ /mswin32|mingw/
|
12
|
+
# $CFLAGS += " -march=i686"
|
13
|
+
#end
|
14
|
+
|
15
|
+
create_makefile(extension_name)
|
data/ext/csv_parser/parser.c
CHANGED
@@ -9,10 +9,13 @@
|
|
9
9
|
#include "ruby/io.h"
|
10
10
|
#endif
|
11
11
|
|
12
|
-
/* default allocated size is 16 */
|
13
12
|
#define DEF_ARRAY_LEN 32
|
14
13
|
|
15
|
-
|
14
|
+
#define UNQUOTED 0
|
15
|
+
#define IN_QUOTED 1
|
16
|
+
#define QUOTE_IN_QUOTED 2
|
17
|
+
|
18
|
+
static VALUE mCsvParser;
|
16
19
|
|
17
20
|
static VALUE parse_line(VALUE self, VALUE str)
|
18
21
|
{
|
@@ -25,7 +28,7 @@ static VALUE parse_line(VALUE self, VALUE str)
|
|
25
28
|
if (len == 0)
|
26
29
|
return Qnil;
|
27
30
|
|
28
|
-
VALUE array = rb_ary_new2(DEF_ARRAY_LEN);
|
31
|
+
VALUE array = rb_ary_new2(DEF_ARRAY_LEN); /* default allocated size is 16 */
|
29
32
|
char value[len]; /* field value, no longer than line */
|
30
33
|
int state = 0;
|
31
34
|
int index = 0;
|
@@ -37,51 +40,49 @@ static VALUE parse_line(VALUE self, VALUE str)
|
|
37
40
|
switch (c)
|
38
41
|
{
|
39
42
|
case ',':
|
40
|
-
if (state ==
|
43
|
+
if (state == UNQUOTED) {
|
41
44
|
rb_ary_push(array, (index == 0 ? Qnil: rb_str_new(value, index)));
|
42
45
|
index = 0;
|
43
46
|
}
|
44
|
-
else if (state ==
|
47
|
+
else if (state == IN_QUOTED) {
|
45
48
|
value[index++] = c;
|
46
49
|
}
|
47
|
-
else if (state ==
|
50
|
+
else if (state == QUOTE_IN_QUOTED) {
|
48
51
|
rb_ary_push(array, rb_str_new(value, index));
|
49
52
|
index = 0;
|
50
|
-
state =
|
53
|
+
state = UNQUOTED;
|
51
54
|
}
|
52
55
|
break;
|
53
56
|
case '"':
|
54
|
-
if (state ==
|
55
|
-
state =
|
57
|
+
if (state == UNQUOTED) {
|
58
|
+
state = IN_QUOTED;
|
56
59
|
}
|
57
60
|
else if (state == 1) {
|
58
|
-
state =
|
61
|
+
state = QUOTE_IN_QUOTED;
|
59
62
|
}
|
60
|
-
else if (state ==
|
63
|
+
else if (state == QUOTE_IN_QUOTED) {
|
61
64
|
value[index++] = c; /* escaped quote */
|
62
|
-
state =
|
65
|
+
state = IN_QUOTED;
|
63
66
|
}
|
64
67
|
break;
|
65
68
|
case 13: /* \r */
|
66
69
|
case 10: /* \n */
|
67
|
-
if (state ==
|
70
|
+
if (state == IN_QUOTED) {
|
68
71
|
value[index++] = c;
|
69
72
|
}
|
70
73
|
else {
|
71
|
-
/* only
|
72
|
-
i = len;
|
74
|
+
i = len; /* only parse first line if multiline */
|
73
75
|
}
|
74
|
-
/* else eat it ??? or return so far */
|
75
76
|
break;
|
76
77
|
default:
|
77
78
|
value[index++] = c;
|
78
79
|
}
|
79
80
|
}
|
80
81
|
|
81
|
-
if (state ==
|
82
|
+
if (state == UNQUOTED) {
|
82
83
|
rb_ary_push(array, (index == 0 ? Qnil: rb_str_new(value, index)));
|
83
84
|
}
|
84
|
-
else if (state ==
|
85
|
+
else if (state == QUOTE_IN_QUOTED) {
|
85
86
|
rb_ary_push(array, rb_str_new(value, index));
|
86
87
|
}
|
87
88
|
return array;
|
@@ -89,6 +90,6 @@ static VALUE parse_line(VALUE self, VALUE str)
|
|
89
90
|
|
90
91
|
void Init_csv_parser()
|
91
92
|
{
|
92
|
-
|
93
|
-
|
93
|
+
mCsvParser = rb_define_module("CsvParser");
|
94
|
+
rb_define_module_function(mCsvParser, "parse_line", parse_line, 1);
|
94
95
|
}
|
data/fastest-csv.gemspec
CHANGED
@@ -4,8 +4,8 @@ require File.expand_path('../lib/fastest-csv/version', __FILE__)
|
|
4
4
|
Gem::Specification.new do |gem|
|
5
5
|
gem.authors = ["Maarten Oelering"]
|
6
6
|
gem.email = ["maarten@brightcode.nl"]
|
7
|
-
gem.description = %q{Fastest standard CSV parser for MRI Ruby}
|
8
|
-
gem.summary = %q{Fastest standard CSV parser for MRI Ruby}
|
7
|
+
gem.description = %q{Fastest standard CSV parser for MRI Ruby and JRuby}
|
8
|
+
gem.summary = %q{Fastest standard CSV parser for MRI Ruby and JRuby}
|
9
9
|
gem.homepage = "https://github.com/brightcode/fastest-csv"
|
10
10
|
|
11
11
|
gem.files = `git ls-files`.split($\)
|
@@ -14,6 +14,13 @@ Gem::Specification.new do |gem|
|
|
14
14
|
gem.name = "fastest-csv"
|
15
15
|
gem.require_paths = ["lib"]
|
16
16
|
gem.version = FastestCSV::VERSION
|
17
|
+
|
18
|
+
if RUBY_PLATFORM =~ /java/
|
19
|
+
gem.platform = "java"
|
20
|
+
gem.files << "lib/csv_parser.jar"
|
21
|
+
else
|
22
|
+
gem.extensions = ['ext/csv_parser/extconf.rb']
|
23
|
+
end
|
17
24
|
|
18
|
-
gem.
|
25
|
+
gem.add_development_dependency "rake-compiler"
|
19
26
|
end
|
data/lib/fastest-csv/version.rb
CHANGED
data/lib/fastest_csv.rb
CHANGED
@@ -1,17 +1,30 @@
|
|
1
|
+
# This loads either csv_parser.so, csv_parser.bundle or
|
2
|
+
# csv_parser.jar, depending on your Ruby platform and OS
|
1
3
|
require 'csv_parser'
|
2
4
|
require 'stringio'
|
3
5
|
|
6
|
+
# Fast CSV parser using native code
|
4
7
|
class FastestCSV
|
8
|
+
include Enumerable
|
5
9
|
|
6
|
-
|
10
|
+
if RUBY_PLATFORM =~ /java/
|
11
|
+
if JRUBY_VERSION =~ /^1\.[0-6]/
|
12
|
+
require 'jruby'
|
13
|
+
org.brightcode.CsvParserService.new.basicLoad(JRuby.runtime)
|
14
|
+
else
|
15
|
+
include_package "org.brightcode"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
# Pass each line of the specified +path+ as array to the provided +block+
|
7
20
|
def self.foreach(path, &block)
|
8
21
|
open(path) do |reader|
|
9
22
|
reader.each(&block)
|
10
23
|
end
|
11
24
|
end
|
12
25
|
|
13
|
-
#
|
14
|
-
# or return
|
26
|
+
# Opens a csv file. Pass a FastestCSV instance to the provided block,
|
27
|
+
# or return it when no block is provided
|
15
28
|
def self.open(path, mode = "rb")
|
16
29
|
csv = new(File.open(path, mode))
|
17
30
|
if block_given?
|
@@ -25,14 +38,17 @@ class FastestCSV
|
|
25
38
|
end
|
26
39
|
end
|
27
40
|
|
41
|
+
# Read all lines from the specified +path+ into an array of arrays
|
28
42
|
def self.read(path)
|
29
43
|
open(path, "rb") { |csv| csv.read }
|
30
44
|
end
|
31
45
|
|
46
|
+
# Alias for FastestCSV.read
|
32
47
|
def self.readlines(path)
|
33
48
|
read(path)
|
34
49
|
end
|
35
50
|
|
51
|
+
# Read all lines from the specified String into an array of arrays
|
36
52
|
def self.parse(data, &block)
|
37
53
|
csv = new(StringIO.new(data))
|
38
54
|
if block.nil?
|
@@ -45,27 +61,44 @@ class FastestCSV
|
|
45
61
|
csv.each(&block)
|
46
62
|
end
|
47
63
|
end
|
64
|
+
|
65
|
+
def self.parse_line(line)
|
66
|
+
CsvParser.parse_line(line)
|
67
|
+
end
|
48
68
|
|
69
|
+
# Create new FastestCSV wrapping the specified IO object
|
49
70
|
def initialize(io)
|
50
71
|
@io = io
|
51
72
|
end
|
52
73
|
|
74
|
+
# Read from the wrapped IO passing each line as array to the specified block
|
53
75
|
def each
|
54
|
-
|
55
|
-
|
76
|
+
if block_given?
|
77
|
+
while row = shift
|
78
|
+
yield row
|
79
|
+
end
|
80
|
+
else
|
81
|
+
to_enum # return enumerator
|
56
82
|
end
|
57
83
|
end
|
58
84
|
|
85
|
+
# Read all remaining lines from the wrapped IO into an array of arrays
|
59
86
|
def read
|
60
87
|
table = Array.new
|
61
88
|
each {|row| table << row}
|
62
89
|
table
|
63
90
|
end
|
64
91
|
alias_method :readlines, :read
|
92
|
+
|
93
|
+
# Rewind the underlying IO object and reset line counter
|
94
|
+
def rewind
|
95
|
+
@io.rewind
|
96
|
+
end
|
65
97
|
|
98
|
+
# Read next line from the wrapped IO and return as array or nil at EOF
|
66
99
|
def shift
|
67
100
|
if line = @io.gets
|
68
|
-
|
101
|
+
CsvParser.parse_line(line)
|
69
102
|
else
|
70
103
|
nil
|
71
104
|
end
|
@@ -73,6 +106,7 @@ class FastestCSV
|
|
73
106
|
alias_method :gets, :shift
|
74
107
|
alias_method :readline, :shift
|
75
108
|
|
109
|
+
# Close the wrapped IO
|
76
110
|
def close
|
77
111
|
@io.close
|
78
112
|
end
|
@@ -83,9 +117,9 @@ class FastestCSV
|
|
83
117
|
end
|
84
118
|
|
85
119
|
class String
|
86
|
-
# Equivalent to <tt>FasterCSV::parse_line(self
|
120
|
+
# Equivalent to <tt>FasterCSV::parse_line(self)</tt>
|
87
121
|
def parse_csv
|
88
|
-
|
122
|
+
CsvParser.parse_line(self)
|
89
123
|
end
|
90
124
|
end
|
91
125
|
|
data/test/tc_csv_parsing.rb
CHANGED
@@ -13,11 +13,15 @@ require 'fastest_csv'
|
|
13
13
|
#
|
14
14
|
class TestCSVParsing < Test::Unit::TestCase
|
15
15
|
|
16
|
+
if RUBY_PLATFORM =~ /java/
|
17
|
+
include_package "org.brightcode"
|
18
|
+
end
|
19
|
+
|
16
20
|
def test_mastering_regex_example
|
17
21
|
ex = %Q{Ten Thousand,10000, 2710 ,,"10,000","It's ""10 Grand"", baby",10K}
|
18
22
|
assert_equal( [ "Ten Thousand", "10000", " 2710 ", nil, "10,000",
|
19
23
|
"It's \"10 Grand\", baby", "10K" ],
|
20
|
-
|
24
|
+
CsvParser.parse_line(ex) )
|
21
25
|
end
|
22
26
|
|
23
27
|
# Pulled from: http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/ruby/test/csv/test_csv.rb?rev=1.12.2.2;content-type=text%2Fplain
|
@@ -49,7 +53,7 @@ class TestCSVParsing < Test::Unit::TestCase
|
|
49
53
|
["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]],
|
50
54
|
["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]],
|
51
55
|
[";,;", [";", ";"]] ].each do |csv_test|
|
52
|
-
assert_equal(csv_test.last,
|
56
|
+
assert_equal(csv_test.last, CsvParser.parse_line(csv_test.first))
|
53
57
|
end
|
54
58
|
|
55
59
|
[ ["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]],
|
@@ -68,7 +72,7 @@ class TestCSVParsing < Test::Unit::TestCase
|
|
68
72
|
["foo,bar", ["foo", "bar"]],
|
69
73
|
["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]],
|
70
74
|
["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]] ].each do |csv_test|
|
71
|
-
assert_equal(csv_test.last,
|
75
|
+
assert_equal(csv_test.last, CsvParser.parse_line(csv_test.first))
|
72
76
|
end
|
73
77
|
end
|
74
78
|
|
@@ -91,20 +95,20 @@ class TestCSVParsing < Test::Unit::TestCase
|
|
91
95
|
[%Q{,"\r"}, [nil,"\r"]],
|
92
96
|
[%Q{"\r\n,"}, ["\r\n,"]],
|
93
97
|
[%Q{"\r\n,",}, ["\r\n,", nil]] ].each do |edge_case|
|
94
|
-
assert_equal(edge_case.last,
|
98
|
+
assert_equal(edge_case.last, CsvParser.parse_line(edge_case.first))
|
95
99
|
end
|
96
100
|
end
|
97
101
|
|
98
102
|
def test_james_edge_cases
|
99
103
|
# A read at eof? should return nil.
|
100
|
-
assert_equal(nil,
|
104
|
+
assert_equal(nil, CsvParser.parse_line(""))
|
101
105
|
#
|
102
106
|
# With CSV it's impossible to tell an empty line from a line containing a
|
103
107
|
# single +nil+ field. The standard CSV library returns <tt>[nil]</tt>
|
104
108
|
# in these cases, but <tt>Array.new</tt> makes more sense to me.
|
105
109
|
#
|
106
110
|
#assert_equal(Array.new, FastestCSV.parse_line("\n1,2,3\n"))
|
107
|
-
assert_equal([nil],
|
111
|
+
assert_equal([nil], CsvParser.parse_line("\n1,2,3\n"))
|
108
112
|
end
|
109
113
|
|
110
114
|
def test_rob_edge_cases
|
@@ -119,7 +123,7 @@ class TestCSVParsing < Test::Unit::TestCase
|
|
119
123
|
[%Q{"a\r\n\r\na","two CRLFs"}, ["a\r\n\r\na", 'two CRLFs']],
|
120
124
|
[%Q{with blank,"start\n\nfinish"\n}, ['with blank', "start\n\nfinish"]],
|
121
125
|
].each do |edge_case|
|
122
|
-
assert_equal(edge_case.last,
|
126
|
+
assert_equal(edge_case.last, CsvParser.parse_line(edge_case.first))
|
123
127
|
end
|
124
128
|
end
|
125
129
|
|
data/test/tc_interface.rb
CHANGED
@@ -116,4 +116,13 @@ class TestFastestCSVInterface < Test::Unit::TestCase
|
|
116
116
|
@expected = [%w{1 2} + ['3' * long_field_length]]
|
117
117
|
test_shift
|
118
118
|
end
|
119
|
+
|
120
|
+
def test_enumerable
|
121
|
+
FastestCSV.open(@path) do |csv|
|
122
|
+
assert(csv.include?(["1", "2", "3"]))
|
123
|
+
csv.rewind
|
124
|
+
assert_equal([["1", "2", "3"], ["4", "5"]], csv.to_a)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
119
128
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fastest-csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,9 +9,25 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
13
|
-
dependencies:
|
14
|
-
|
12
|
+
date: 2013-08-15 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rake-compiler
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :development
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
description: Fastest standard CSV parser for MRI Ruby and JRuby
|
15
31
|
email:
|
16
32
|
- maarten@brightcode.nl
|
17
33
|
executables: []
|
@@ -24,6 +40,8 @@ files:
|
|
24
40
|
- LICENSE
|
25
41
|
- README.md
|
26
42
|
- Rakefile
|
43
|
+
- ext/csv_parser/CsvParser.java
|
44
|
+
- ext/csv_parser/CsvParserService.java
|
27
45
|
- ext/csv_parser/extconf.rb
|
28
46
|
- ext/csv_parser/parser.c
|
29
47
|
- fastest-csv.gemspec
|
@@ -52,10 +70,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
52
70
|
version: '0'
|
53
71
|
requirements: []
|
54
72
|
rubyforge_project:
|
55
|
-
rubygems_version: 1.8.
|
73
|
+
rubygems_version: 1.8.25
|
56
74
|
signing_key:
|
57
75
|
specification_version: 3
|
58
|
-
summary: Fastest standard CSV parser for MRI Ruby
|
76
|
+
summary: Fastest standard CSV parser for MRI Ruby and JRuby
|
59
77
|
test_files:
|
60
78
|
- test/tc_csv_parsing.rb
|
61
79
|
- test/tc_interface.rb
|