fastest-csv 0.0.2-java → 0.0.4-java
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/csv_parser/CsvParser.java +89 -0
- data/ext/csv_parser/CsvParserService.java +4 -1
- data/ext/csv_parser/parser.c +1 -9
- data/lib/fastest-csv/version.rb +1 -1
- data/lib/fastest_csv.rb +21 -7
- data/test/tc_csv_parsing.rb +4 -0
- data/test/tc_interface.rb +9 -0
- metadata +62 -63
@@ -0,0 +1,89 @@
|
|
1
|
+
//
|
2
|
+
// Copyright (c) Maarten Oelering, BrightCode BV
|
3
|
+
//
|
4
|
+
|
5
|
+
package org.brightcode;
|
6
|
+
|
7
|
+
import java.util.ArrayList;
|
8
|
+
import java.util.List;
|
9
|
+
|
10
|
+
public class CsvParser {
|
11
|
+
|
12
|
+
private static int DEF_ARRAY_LEN = 32;
|
13
|
+
|
14
|
+
private static int UNQUOTED = 0;
|
15
|
+
private static int IN_QUOTED = 1;
|
16
|
+
private static int QUOTE_IN_QUOTED = 2;
|
17
|
+
|
18
|
+
public static List parseLine(String line) {
|
19
|
+
int length = line.length();
|
20
|
+
if (length == 0)
|
21
|
+
return null;
|
22
|
+
|
23
|
+
int state = UNQUOTED;
|
24
|
+
StringBuilder value = new StringBuilder(length); // field value, no longer than line
|
25
|
+
List<String> array = new ArrayList<String>(DEF_ARRAY_LEN);
|
26
|
+
|
27
|
+
for (int i = 0; i < length; i++) {
|
28
|
+
char c = line.charAt(i);
|
29
|
+
switch (c) {
|
30
|
+
case ',':
|
31
|
+
if (state == UNQUOTED) {
|
32
|
+
if (value.length() == 0) {
|
33
|
+
array.add(null);
|
34
|
+
}
|
35
|
+
else {
|
36
|
+
array.add(value.toString());
|
37
|
+
value.setLength(0);
|
38
|
+
}
|
39
|
+
}
|
40
|
+
else if (state == IN_QUOTED) {
|
41
|
+
value.append(c);
|
42
|
+
}
|
43
|
+
else if (state == 2) {
|
44
|
+
array.add(value.toString());
|
45
|
+
value.setLength(0);
|
46
|
+
state = UNQUOTED;
|
47
|
+
}
|
48
|
+
break;
|
49
|
+
case '"':
|
50
|
+
if (state == UNQUOTED) {
|
51
|
+
state = IN_QUOTED;
|
52
|
+
}
|
53
|
+
else if (state == IN_QUOTED) {
|
54
|
+
state = QUOTE_IN_QUOTED;
|
55
|
+
}
|
56
|
+
else if (state == QUOTE_IN_QUOTED) {
|
57
|
+
value.append(c); // escaped quote
|
58
|
+
state = IN_QUOTED;
|
59
|
+
}
|
60
|
+
break;
|
61
|
+
case '\r':
|
62
|
+
case '\n':
|
63
|
+
if (state == IN_QUOTED) {
|
64
|
+
value.append(c);
|
65
|
+
}
|
66
|
+
else {
|
67
|
+
i = length; // only parse first line if multiline
|
68
|
+
}
|
69
|
+
break;
|
70
|
+
default:
|
71
|
+
value.append(c);
|
72
|
+
break;
|
73
|
+
}
|
74
|
+
}
|
75
|
+
if (state == UNQUOTED) {
|
76
|
+
if (value.length() == 0) {
|
77
|
+
array.add(null);
|
78
|
+
}
|
79
|
+
else {
|
80
|
+
array.add(value.toString());
|
81
|
+
value.setLength(0);
|
82
|
+
}
|
83
|
+
}
|
84
|
+
else if (state == QUOTE_IN_QUOTED) {
|
85
|
+
array.add(value.toString());
|
86
|
+
}
|
87
|
+
return array;
|
88
|
+
}
|
89
|
+
}
|
@@ -19,6 +19,8 @@ public class CsvParserService implements BasicLibraryService {
|
|
19
19
|
|
20
20
|
private Ruby runtime;
|
21
21
|
|
22
|
+
private static int DEF_ARRAY_LEN = 32;
|
23
|
+
|
22
24
|
private static int UNQUOTED = 0;
|
23
25
|
private static int IN_QUOTED = 1;
|
24
26
|
private static int QUOTE_IN_QUOTED = 2;
|
@@ -29,6 +31,7 @@ public class CsvParserService implements BasicLibraryService {
|
|
29
31
|
this.runtime = runtime;
|
30
32
|
|
31
33
|
RubyModule mCsvParser = runtime.defineModule("CsvParser");
|
34
|
+
// TODO: CallbackFactory#getSingletonMethod is deprecated
|
32
35
|
CallbackFactory callbackFactory = runtime.callbackFactory(CsvParserService.class);
|
33
36
|
mCsvParser.defineModuleFunction("parse_line",
|
34
37
|
callbackFactory.getSingletonMethod("parseLine", RubyString.class));
|
@@ -45,7 +48,7 @@ public class CsvParserService implements BasicLibraryService {
|
|
45
48
|
|
46
49
|
int state = UNQUOTED;
|
47
50
|
StringBuilder value = new StringBuilder(length); // field value, no longer than line
|
48
|
-
RubyArray array = RubyArray.newArray(runtime,
|
51
|
+
RubyArray array = RubyArray.newArray(runtime, DEF_ARRAY_LEN);
|
49
52
|
|
50
53
|
for (int i = 0; i < length; i++) {
|
51
54
|
char c = seq.charAt(i);
|
data/ext/csv_parser/parser.c
CHANGED
@@ -9,16 +9,12 @@
|
|
9
9
|
#include "ruby/io.h"
|
10
10
|
#endif
|
11
11
|
|
12
|
-
/* default allocated size is 16 */
|
13
12
|
#define DEF_ARRAY_LEN 32
|
14
13
|
|
15
14
|
#define UNQUOTED 0
|
16
15
|
#define IN_QUOTED 1
|
17
16
|
#define QUOTE_IN_QUOTED 2
|
18
17
|
|
19
|
-
/*
|
20
|
-
static VALUE cFastestCSV;
|
21
|
-
*/
|
22
18
|
static VALUE mCsvParser;
|
23
19
|
|
24
20
|
static VALUE parse_line(VALUE self, VALUE str)
|
@@ -32,7 +28,7 @@ static VALUE parse_line(VALUE self, VALUE str)
|
|
32
28
|
if (len == 0)
|
33
29
|
return Qnil;
|
34
30
|
|
35
|
-
VALUE array = rb_ary_new2(DEF_ARRAY_LEN);
|
31
|
+
VALUE array = rb_ary_new2(DEF_ARRAY_LEN); /* default allocated size is 16 */
|
36
32
|
char value[len]; /* field value, no longer than line */
|
37
33
|
int state = 0;
|
38
34
|
int index = 0;
|
@@ -94,10 +90,6 @@ static VALUE parse_line(VALUE self, VALUE str)
|
|
94
90
|
|
95
91
|
void Init_csv_parser()
|
96
92
|
{
|
97
|
-
/*
|
98
|
-
cFastestCSV = rb_define_class("FastestCSV", rb_cObject);
|
99
|
-
rb_define_singleton_method(cFastestCSV, "parse_line", parse_line, 1);
|
100
|
-
*/
|
101
93
|
mCsvParser = rb_define_module("CsvParser");
|
102
94
|
rb_define_module_function(mCsvParser, "parse_line", parse_line, 1);
|
103
95
|
}
|
data/lib/fastest-csv/version.rb
CHANGED
data/lib/fastest_csv.rb
CHANGED
@@ -5,10 +5,15 @@ require 'stringio'
|
|
5
5
|
|
6
6
|
# Fast CSV parser using native code
|
7
7
|
class FastestCSV
|
8
|
+
include Enumerable
|
8
9
|
|
9
10
|
if RUBY_PLATFORM =~ /java/
|
10
|
-
|
11
|
-
|
11
|
+
if JRUBY_VERSION =~ /^1\.[0-6]/
|
12
|
+
require 'jruby'
|
13
|
+
org.brightcode.CsvParserService.new.basicLoad(JRuby.runtime)
|
14
|
+
else
|
15
|
+
include_package "org.brightcode"
|
16
|
+
end
|
12
17
|
end
|
13
18
|
|
14
19
|
# Pass each line of the specified +path+ as array to the provided +block+
|
@@ -58,7 +63,7 @@ class FastestCSV
|
|
58
63
|
end
|
59
64
|
|
60
65
|
def self.parse_line(line)
|
61
|
-
|
66
|
+
CsvParser.parse_line(line)
|
62
67
|
end
|
63
68
|
|
64
69
|
# Create new FastestCSV wrapping the specified IO object
|
@@ -68,8 +73,12 @@ class FastestCSV
|
|
68
73
|
|
69
74
|
# Read from the wrapped IO passing each line as array to the specified block
|
70
75
|
def each
|
71
|
-
|
72
|
-
|
76
|
+
if block_given?
|
77
|
+
while row = shift
|
78
|
+
yield row
|
79
|
+
end
|
80
|
+
else
|
81
|
+
to_enum # return enumerator
|
73
82
|
end
|
74
83
|
end
|
75
84
|
|
@@ -80,11 +89,16 @@ class FastestCSV
|
|
80
89
|
table
|
81
90
|
end
|
82
91
|
alias_method :readlines, :read
|
92
|
+
|
93
|
+
# Rewind the underlying IO object and reset line counter
|
94
|
+
def rewind
|
95
|
+
@io.rewind
|
96
|
+
end
|
83
97
|
|
84
98
|
# Read next line from the wrapped IO and return as array or nil at EOF
|
85
99
|
def shift
|
86
100
|
if line = @io.gets
|
87
|
-
|
101
|
+
CsvParser.parse_line(line)
|
88
102
|
else
|
89
103
|
nil
|
90
104
|
end
|
@@ -105,7 +119,7 @@ end
|
|
105
119
|
class String
|
106
120
|
# Equivalent to <tt>FasterCSV::parse_line(self)</tt>
|
107
121
|
def parse_csv
|
108
|
-
|
122
|
+
CsvParser.parse_line(self)
|
109
123
|
end
|
110
124
|
end
|
111
125
|
|
data/test/tc_csv_parsing.rb
CHANGED
@@ -13,6 +13,10 @@ require 'fastest_csv'
|
|
13
13
|
#
|
14
14
|
class TestCSVParsing < Test::Unit::TestCase
|
15
15
|
|
16
|
+
if RUBY_PLATFORM =~ /java/
|
17
|
+
include_package "org.brightcode"
|
18
|
+
end
|
19
|
+
|
16
20
|
def test_mastering_regex_example
|
17
21
|
ex = %Q{Ten Thousand,10000, 2710 ,,"10,000","It's ""10 Grand"", baby",10K}
|
18
22
|
assert_equal( [ "Ten Thousand", "10000", " 2710 ", nil, "10,000",
|
data/test/tc_interface.rb
CHANGED
@@ -116,4 +116,13 @@ class TestFastestCSVInterface < Test::Unit::TestCase
|
|
116
116
|
@expected = [%w{1 2} + ['3' * long_field_length]]
|
117
117
|
test_shift
|
118
118
|
end
|
119
|
+
|
120
|
+
def test_enumerable
|
121
|
+
FastestCSV.open(@path) do |csv|
|
122
|
+
assert(csv.include?(["1", "2", "3"]))
|
123
|
+
csv.rewind
|
124
|
+
assert_equal([["1", "2", "3"], ["4", "5"]], csv.to_a)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
119
128
|
end
|
metadata
CHANGED
@@ -1,80 +1,79 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: fastest-csv
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
prerelease:
|
5
|
-
version: 0.0.
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 0.0.4
|
6
6
|
platform: java
|
7
|
-
authors:
|
8
|
-
|
9
|
-
autorequire:
|
7
|
+
authors:
|
8
|
+
- Maarten Oelering
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
12
|
+
date: 2013-08-16 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rake-compiler
|
16
|
+
version_requirements: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - '>='
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '0'
|
21
|
+
none: false
|
22
|
+
requirement: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
none: false
|
28
|
+
prerelease: false
|
29
|
+
type: :development
|
26
30
|
description: Fastest standard CSV parser for MRI Ruby and JRuby
|
27
|
-
email:
|
28
|
-
|
31
|
+
email:
|
32
|
+
- maarten@brightcode.nl
|
29
33
|
executables: []
|
30
|
-
|
31
34
|
extensions: []
|
32
|
-
|
33
35
|
extra_rdoc_files: []
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
36
|
+
files:
|
37
|
+
- .gitignore
|
38
|
+
- Gemfile
|
39
|
+
- LICENSE
|
40
|
+
- README.md
|
41
|
+
- Rakefile
|
42
|
+
- ext/csv_parser/CsvParser.java
|
43
|
+
- ext/csv_parser/CsvParserService.java
|
44
|
+
- ext/csv_parser/extconf.rb
|
45
|
+
- ext/csv_parser/parser.c
|
46
|
+
- fastest-csv.gemspec
|
47
|
+
- lib/fastest-csv.rb
|
48
|
+
- lib/fastest-csv/version.rb
|
49
|
+
- lib/fastest_csv.rb
|
50
|
+
- test/tc_csv_parsing.rb
|
51
|
+
- test/tc_interface.rb
|
52
|
+
- lib/csv_parser.jar
|
51
53
|
homepage: https://github.com/brightcode/fastest-csv
|
52
54
|
licenses: []
|
53
|
-
|
54
|
-
post_install_message:
|
55
|
+
post_install_message:
|
55
56
|
rdoc_options: []
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
57
|
+
require_paths:
|
58
|
+
- lib
|
59
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
60
|
+
requirements:
|
61
|
+
- - '>='
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: '0'
|
60
64
|
none: false
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
66
70
|
none: false
|
67
|
-
requirements:
|
68
|
-
- - ">="
|
69
|
-
- !ruby/object:Gem::Version
|
70
|
-
version: "0"
|
71
71
|
requirements: []
|
72
|
-
|
73
|
-
rubyforge_project:
|
72
|
+
rubyforge_project:
|
74
73
|
rubygems_version: 1.8.24
|
75
|
-
signing_key:
|
74
|
+
signing_key:
|
76
75
|
specification_version: 3
|
77
76
|
summary: Fastest standard CSV parser for MRI Ruby and JRuby
|
78
|
-
test_files:
|
79
|
-
|
80
|
-
|
77
|
+
test_files:
|
78
|
+
- test/tc_csv_parsing.rb
|
79
|
+
- test/tc_interface.rb
|