fastest-csv 0.0.2-java → 0.0.4-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/csv_parser/CsvParser.java +89 -0
- data/ext/csv_parser/CsvParserService.java +4 -1
- data/ext/csv_parser/parser.c +1 -9
- data/lib/fastest-csv/version.rb +1 -1
- data/lib/fastest_csv.rb +21 -7
- data/test/tc_csv_parsing.rb +4 -0
- data/test/tc_interface.rb +9 -0
- metadata +62 -63
@@ -0,0 +1,89 @@
|
|
1
|
+
//
|
2
|
+
// Copyright (c) Maarten Oelering, BrightCode BV
|
3
|
+
//
|
4
|
+
|
5
|
+
package org.brightcode;
|
6
|
+
|
7
|
+
import java.util.ArrayList;
|
8
|
+
import java.util.List;
|
9
|
+
|
10
|
+
public class CsvParser {
|
11
|
+
|
12
|
+
private static int DEF_ARRAY_LEN = 32;
|
13
|
+
|
14
|
+
private static int UNQUOTED = 0;
|
15
|
+
private static int IN_QUOTED = 1;
|
16
|
+
private static int QUOTE_IN_QUOTED = 2;
|
17
|
+
|
18
|
+
public static List parseLine(String line) {
|
19
|
+
int length = line.length();
|
20
|
+
if (length == 0)
|
21
|
+
return null;
|
22
|
+
|
23
|
+
int state = UNQUOTED;
|
24
|
+
StringBuilder value = new StringBuilder(length); // field value, no longer than line
|
25
|
+
List<String> array = new ArrayList<String>(DEF_ARRAY_LEN);
|
26
|
+
|
27
|
+
for (int i = 0; i < length; i++) {
|
28
|
+
char c = line.charAt(i);
|
29
|
+
switch (c) {
|
30
|
+
case ',':
|
31
|
+
if (state == UNQUOTED) {
|
32
|
+
if (value.length() == 0) {
|
33
|
+
array.add(null);
|
34
|
+
}
|
35
|
+
else {
|
36
|
+
array.add(value.toString());
|
37
|
+
value.setLength(0);
|
38
|
+
}
|
39
|
+
}
|
40
|
+
else if (state == IN_QUOTED) {
|
41
|
+
value.append(c);
|
42
|
+
}
|
43
|
+
else if (state == 2) {
|
44
|
+
array.add(value.toString());
|
45
|
+
value.setLength(0);
|
46
|
+
state = UNQUOTED;
|
47
|
+
}
|
48
|
+
break;
|
49
|
+
case '"':
|
50
|
+
if (state == UNQUOTED) {
|
51
|
+
state = IN_QUOTED;
|
52
|
+
}
|
53
|
+
else if (state == IN_QUOTED) {
|
54
|
+
state = QUOTE_IN_QUOTED;
|
55
|
+
}
|
56
|
+
else if (state == QUOTE_IN_QUOTED) {
|
57
|
+
value.append(c); // escaped quote
|
58
|
+
state = IN_QUOTED;
|
59
|
+
}
|
60
|
+
break;
|
61
|
+
case '\r':
|
62
|
+
case '\n':
|
63
|
+
if (state == IN_QUOTED) {
|
64
|
+
value.append(c);
|
65
|
+
}
|
66
|
+
else {
|
67
|
+
i = length; // only parse first line if multiline
|
68
|
+
}
|
69
|
+
break;
|
70
|
+
default:
|
71
|
+
value.append(c);
|
72
|
+
break;
|
73
|
+
}
|
74
|
+
}
|
75
|
+
if (state == UNQUOTED) {
|
76
|
+
if (value.length() == 0) {
|
77
|
+
array.add(null);
|
78
|
+
}
|
79
|
+
else {
|
80
|
+
array.add(value.toString());
|
81
|
+
value.setLength(0);
|
82
|
+
}
|
83
|
+
}
|
84
|
+
else if (state == QUOTE_IN_QUOTED) {
|
85
|
+
array.add(value.toString());
|
86
|
+
}
|
87
|
+
return array;
|
88
|
+
}
|
89
|
+
}
|
@@ -19,6 +19,8 @@ public class CsvParserService implements BasicLibraryService {
|
|
19
19
|
|
20
20
|
private Ruby runtime;
|
21
21
|
|
22
|
+
private static int DEF_ARRAY_LEN = 32;
|
23
|
+
|
22
24
|
private static int UNQUOTED = 0;
|
23
25
|
private static int IN_QUOTED = 1;
|
24
26
|
private static int QUOTE_IN_QUOTED = 2;
|
@@ -29,6 +31,7 @@ public class CsvParserService implements BasicLibraryService {
|
|
29
31
|
this.runtime = runtime;
|
30
32
|
|
31
33
|
RubyModule mCsvParser = runtime.defineModule("CsvParser");
|
34
|
+
// TODO: CallbackFactory#getSingletonMethod is deprecated
|
32
35
|
CallbackFactory callbackFactory = runtime.callbackFactory(CsvParserService.class);
|
33
36
|
mCsvParser.defineModuleFunction("parse_line",
|
34
37
|
callbackFactory.getSingletonMethod("parseLine", RubyString.class));
|
@@ -45,7 +48,7 @@ public class CsvParserService implements BasicLibraryService {
|
|
45
48
|
|
46
49
|
int state = UNQUOTED;
|
47
50
|
StringBuilder value = new StringBuilder(length); // field value, no longer than line
|
48
|
-
RubyArray array = RubyArray.newArray(runtime,
|
51
|
+
RubyArray array = RubyArray.newArray(runtime, DEF_ARRAY_LEN);
|
49
52
|
|
50
53
|
for (int i = 0; i < length; i++) {
|
51
54
|
char c = seq.charAt(i);
|
data/ext/csv_parser/parser.c
CHANGED
@@ -9,16 +9,12 @@
|
|
9
9
|
#include "ruby/io.h"
|
10
10
|
#endif
|
11
11
|
|
12
|
-
/* default allocated size is 16 */
|
13
12
|
#define DEF_ARRAY_LEN 32
|
14
13
|
|
15
14
|
#define UNQUOTED 0
|
16
15
|
#define IN_QUOTED 1
|
17
16
|
#define QUOTE_IN_QUOTED 2
|
18
17
|
|
19
|
-
/*
|
20
|
-
static VALUE cFastestCSV;
|
21
|
-
*/
|
22
18
|
static VALUE mCsvParser;
|
23
19
|
|
24
20
|
static VALUE parse_line(VALUE self, VALUE str)
|
@@ -32,7 +28,7 @@ static VALUE parse_line(VALUE self, VALUE str)
|
|
32
28
|
if (len == 0)
|
33
29
|
return Qnil;
|
34
30
|
|
35
|
-
VALUE array = rb_ary_new2(DEF_ARRAY_LEN);
|
31
|
+
VALUE array = rb_ary_new2(DEF_ARRAY_LEN); /* default allocated size is 16 */
|
36
32
|
char value[len]; /* field value, no longer than line */
|
37
33
|
int state = 0;
|
38
34
|
int index = 0;
|
@@ -94,10 +90,6 @@ static VALUE parse_line(VALUE self, VALUE str)
|
|
94
90
|
|
95
91
|
void Init_csv_parser()
|
96
92
|
{
|
97
|
-
/*
|
98
|
-
cFastestCSV = rb_define_class("FastestCSV", rb_cObject);
|
99
|
-
rb_define_singleton_method(cFastestCSV, "parse_line", parse_line, 1);
|
100
|
-
*/
|
101
93
|
mCsvParser = rb_define_module("CsvParser");
|
102
94
|
rb_define_module_function(mCsvParser, "parse_line", parse_line, 1);
|
103
95
|
}
|
data/lib/fastest-csv/version.rb
CHANGED
data/lib/fastest_csv.rb
CHANGED
@@ -5,10 +5,15 @@ require 'stringio'
|
|
5
5
|
|
6
6
|
# Fast CSV parser using native code
|
7
7
|
class FastestCSV
|
8
|
+
include Enumerable
|
8
9
|
|
9
10
|
if RUBY_PLATFORM =~ /java/
|
10
|
-
|
11
|
-
|
11
|
+
if JRUBY_VERSION =~ /^1\.[0-6]/
|
12
|
+
require 'jruby'
|
13
|
+
org.brightcode.CsvParserService.new.basicLoad(JRuby.runtime)
|
14
|
+
else
|
15
|
+
include_package "org.brightcode"
|
16
|
+
end
|
12
17
|
end
|
13
18
|
|
14
19
|
# Pass each line of the specified +path+ as array to the provided +block+
|
@@ -58,7 +63,7 @@ class FastestCSV
|
|
58
63
|
end
|
59
64
|
|
60
65
|
def self.parse_line(line)
|
61
|
-
|
66
|
+
CsvParser.parse_line(line)
|
62
67
|
end
|
63
68
|
|
64
69
|
# Create new FastestCSV wrapping the specified IO object
|
@@ -68,8 +73,12 @@ class FastestCSV
|
|
68
73
|
|
69
74
|
# Read from the wrapped IO passing each line as array to the specified block
|
70
75
|
def each
|
71
|
-
|
72
|
-
|
76
|
+
if block_given?
|
77
|
+
while row = shift
|
78
|
+
yield row
|
79
|
+
end
|
80
|
+
else
|
81
|
+
to_enum # return enumerator
|
73
82
|
end
|
74
83
|
end
|
75
84
|
|
@@ -80,11 +89,16 @@ class FastestCSV
|
|
80
89
|
table
|
81
90
|
end
|
82
91
|
alias_method :readlines, :read
|
92
|
+
|
93
|
+
# Rewind the underlying IO object and reset line counter
|
94
|
+
def rewind
|
95
|
+
@io.rewind
|
96
|
+
end
|
83
97
|
|
84
98
|
# Read next line from the wrapped IO and return as array or nil at EOF
|
85
99
|
def shift
|
86
100
|
if line = @io.gets
|
87
|
-
|
101
|
+
CsvParser.parse_line(line)
|
88
102
|
else
|
89
103
|
nil
|
90
104
|
end
|
@@ -105,7 +119,7 @@ end
|
|
105
119
|
class String
|
106
120
|
# Equivalent to <tt>FasterCSV::parse_line(self)</tt>
|
107
121
|
def parse_csv
|
108
|
-
|
122
|
+
CsvParser.parse_line(self)
|
109
123
|
end
|
110
124
|
end
|
111
125
|
|
data/test/tc_csv_parsing.rb
CHANGED
@@ -13,6 +13,10 @@ require 'fastest_csv'
|
|
13
13
|
#
|
14
14
|
class TestCSVParsing < Test::Unit::TestCase
|
15
15
|
|
16
|
+
if RUBY_PLATFORM =~ /java/
|
17
|
+
include_package "org.brightcode"
|
18
|
+
end
|
19
|
+
|
16
20
|
def test_mastering_regex_example
|
17
21
|
ex = %Q{Ten Thousand,10000, 2710 ,,"10,000","It's ""10 Grand"", baby",10K}
|
18
22
|
assert_equal( [ "Ten Thousand", "10000", " 2710 ", nil, "10,000",
|
data/test/tc_interface.rb
CHANGED
@@ -116,4 +116,13 @@ class TestFastestCSVInterface < Test::Unit::TestCase
|
|
116
116
|
@expected = [%w{1 2} + ['3' * long_field_length]]
|
117
117
|
test_shift
|
118
118
|
end
|
119
|
+
|
120
|
+
def test_enumerable
|
121
|
+
FastestCSV.open(@path) do |csv|
|
122
|
+
assert(csv.include?(["1", "2", "3"]))
|
123
|
+
csv.rewind
|
124
|
+
assert_equal([["1", "2", "3"], ["4", "5"]], csv.to_a)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
119
128
|
end
|
metadata
CHANGED
@@ -1,80 +1,79 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: fastest-csv
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
prerelease:
|
5
|
-
version: 0.0.
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 0.0.4
|
6
6
|
platform: java
|
7
|
-
authors:
|
8
|
-
|
9
|
-
autorequire:
|
7
|
+
authors:
|
8
|
+
- Maarten Oelering
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
12
|
+
date: 2013-08-16 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rake-compiler
|
16
|
+
version_requirements: !ruby/object:Gem::Requirement
|
17
|
+
requirements:
|
18
|
+
- - '>='
|
19
|
+
- !ruby/object:Gem::Version
|
20
|
+
version: '0'
|
21
|
+
none: false
|
22
|
+
requirement: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
none: false
|
28
|
+
prerelease: false
|
29
|
+
type: :development
|
26
30
|
description: Fastest standard CSV parser for MRI Ruby and JRuby
|
27
|
-
email:
|
28
|
-
|
31
|
+
email:
|
32
|
+
- maarten@brightcode.nl
|
29
33
|
executables: []
|
30
|
-
|
31
34
|
extensions: []
|
32
|
-
|
33
35
|
extra_rdoc_files: []
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
36
|
+
files:
|
37
|
+
- .gitignore
|
38
|
+
- Gemfile
|
39
|
+
- LICENSE
|
40
|
+
- README.md
|
41
|
+
- Rakefile
|
42
|
+
- ext/csv_parser/CsvParser.java
|
43
|
+
- ext/csv_parser/CsvParserService.java
|
44
|
+
- ext/csv_parser/extconf.rb
|
45
|
+
- ext/csv_parser/parser.c
|
46
|
+
- fastest-csv.gemspec
|
47
|
+
- lib/fastest-csv.rb
|
48
|
+
- lib/fastest-csv/version.rb
|
49
|
+
- lib/fastest_csv.rb
|
50
|
+
- test/tc_csv_parsing.rb
|
51
|
+
- test/tc_interface.rb
|
52
|
+
- lib/csv_parser.jar
|
51
53
|
homepage: https://github.com/brightcode/fastest-csv
|
52
54
|
licenses: []
|
53
|
-
|
54
|
-
post_install_message:
|
55
|
+
post_install_message:
|
55
56
|
rdoc_options: []
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
57
|
+
require_paths:
|
58
|
+
- lib
|
59
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
60
|
+
requirements:
|
61
|
+
- - '>='
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: '0'
|
60
64
|
none: false
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
66
70
|
none: false
|
67
|
-
requirements:
|
68
|
-
- - ">="
|
69
|
-
- !ruby/object:Gem::Version
|
70
|
-
version: "0"
|
71
71
|
requirements: []
|
72
|
-
|
73
|
-
rubyforge_project:
|
72
|
+
rubyforge_project:
|
74
73
|
rubygems_version: 1.8.24
|
75
|
-
signing_key:
|
74
|
+
signing_key:
|
76
75
|
specification_version: 3
|
77
76
|
summary: Fastest standard CSV parser for MRI Ruby and JRuby
|
78
|
-
test_files:
|
79
|
-
|
80
|
-
|
77
|
+
test_files:
|
78
|
+
- test/tc_csv_parsing.rb
|
79
|
+
- test/tc_interface.rb
|