fastest-csv 0.0.2-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +20 -0
- data/Gemfile +4 -0
- data/LICENSE +22 -0
- data/README.md +68 -0
- data/Rakefile +22 -0
- data/ext/csv_parser/CsvParserService.java +112 -0
- data/ext/csv_parser/extconf.rb +15 -0
- data/ext/csv_parser/parser.c +103 -0
- data/fastest-csv.gemspec +26 -0
- data/lib/fastest-csv.rb +1 -0
- data/lib/fastest-csv/version.rb +3 -0
- data/lib/fastest_csv.rb +111 -0
- data/test/tc_csv_parsing.rb +126 -0
- data/test/tc_interface.rb +119 -0
- metadata +80 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2012 Maarten Oelering
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
# FastestCSV
|
2
|
+
|
3
|
+
Fastest CSV class for MRI Ruby and JRuby. Faster than faster_csv and fasterer-csv.
|
4
|
+
|
5
|
+
Uses native C code to parse CSV lines in MRI Ruby and Java in JRuby.
|
6
|
+
|
7
|
+
Supports standard CSV according to RFC4180. Not the so-called "csv" from Excel.
|
8
|
+
|
9
|
+
The interface is a subset of the CSV interface in Ruby 1.9.3. The options parameter is not supported.
|
10
|
+
|
11
|
+
Originally developed to parse large CSV log files from PowerMTA.
|
12
|
+
|
13
|
+
## Installation
|
14
|
+
|
15
|
+
Add this line to your application's Gemfile:
|
16
|
+
|
17
|
+
gem 'fastest-csv'
|
18
|
+
|
19
|
+
And then execute:
|
20
|
+
|
21
|
+
$ bundle
|
22
|
+
|
23
|
+
Or install it yourself as:
|
24
|
+
|
25
|
+
$ gem install fastest-csv
|
26
|
+
|
27
|
+
## Usage
|
28
|
+
|
29
|
+
Parse single line
|
30
|
+
|
31
|
+
FastestCSV.parse_line("one,two,three")
|
32
|
+
=> ["one", "two", "three"]
|
33
|
+
|
34
|
+
"one,two,three".parse_csv
|
35
|
+
=> ["one", "two", "three"]
|
36
|
+
|
37
|
+
Parse file without header
|
38
|
+
|
39
|
+
FastestCSV.foreach("path/to/file.csv") do |row|
|
40
|
+
while row = csv.shift
|
41
|
+
#
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
Parse file with header
|
46
|
+
|
47
|
+
FastestCSV.open("path/to/file.csv") do |csv|
|
48
|
+
fields = csv.shift
|
49
|
+
while values = csv.shift
|
50
|
+
#
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
Parse file in array of arrays
|
55
|
+
|
56
|
+
rows = FastestCSV.read("path/to/file.csv")
|
57
|
+
|
58
|
+
Parse string in array of arrays
|
59
|
+
|
60
|
+
rows = FastestCSV.parse(csv_data)
|
61
|
+
|
62
|
+
## Contributing
|
63
|
+
|
64
|
+
1. Fork it
|
65
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
66
|
+
3. Commit your changes (`git commit -am 'Added some feature'`)
|
67
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
68
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
#!/usr/bin/env rake
|
2
|
+
require "bundler/gem_tasks"
|
3
|
+
|
4
|
+
spec = Gem::Specification.load('fastest-csv.gemspec')
|
5
|
+
|
6
|
+
if RUBY_PLATFORM =~ /java/
|
7
|
+
require 'rake/javaextensiontask'
|
8
|
+
Rake::JavaExtensionTask.new('csv_parser', spec)
|
9
|
+
else
|
10
|
+
require 'rake/extensiontask'
|
11
|
+
Rake::ExtensionTask.new('csv_parser', spec)
|
12
|
+
end
|
13
|
+
|
14
|
+
require 'rake/testtask'
|
15
|
+
Rake::TestTask.new do |t|
|
16
|
+
t.libs << "test"
|
17
|
+
t.test_files = FileList['test/tc_*.rb']
|
18
|
+
#test.libs << 'lib' << 'test'
|
19
|
+
#test.pattern = 'test/**/test_*.rb'
|
20
|
+
#test.verbose = true
|
21
|
+
end
|
22
|
+
|
@@ -0,0 +1,112 @@
|
|
1
|
+
//
|
2
|
+
// Copyright (c) Maarten Oelering, BrightCode BV
|
3
|
+
//
|
4
|
+
|
5
|
+
package org.brightcode;
|
6
|
+
|
7
|
+
import java.io.IOException;
|
8
|
+
|
9
|
+
import org.jruby.Ruby;
|
10
|
+
import org.jruby.RubyArray;
|
11
|
+
import org.jruby.RubyModule;
|
12
|
+
import org.jruby.RubyString;
|
13
|
+
import org.jruby.runtime.Block;
|
14
|
+
import org.jruby.runtime.CallbackFactory;
|
15
|
+
import org.jruby.runtime.builtin.IRubyObject;
|
16
|
+
import org.jruby.runtime.load.BasicLibraryService;
|
17
|
+
|
18
|
+
public class CsvParserService implements BasicLibraryService {
|
19
|
+
|
20
|
+
private Ruby runtime;
|
21
|
+
|
22
|
+
private static int UNQUOTED = 0;
|
23
|
+
private static int IN_QUOTED = 1;
|
24
|
+
private static int QUOTE_IN_QUOTED = 2;
|
25
|
+
|
26
|
+
// Initial setup function. Takes a reference to the current JRuby runtime and
|
27
|
+
// sets up our modules.
|
28
|
+
public boolean basicLoad(Ruby runtime) throws IOException {
|
29
|
+
this.runtime = runtime;
|
30
|
+
|
31
|
+
RubyModule mCsvParser = runtime.defineModule("CsvParser");
|
32
|
+
CallbackFactory callbackFactory = runtime.callbackFactory(CsvParserService.class);
|
33
|
+
mCsvParser.defineModuleFunction("parse_line",
|
34
|
+
callbackFactory.getSingletonMethod("parseLine", RubyString.class));
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
public static IRubyObject parseLine(IRubyObject recv, RubyString line, Block unusedBlock) {
|
39
|
+
Ruby runtime = recv.getRuntime();
|
40
|
+
|
41
|
+
CharSequence seq = line.getValue();
|
42
|
+
int length = seq.length();
|
43
|
+
if (length == 0)
|
44
|
+
return runtime.getNil();
|
45
|
+
|
46
|
+
int state = UNQUOTED;
|
47
|
+
StringBuilder value = new StringBuilder(length); // field value, no longer than line
|
48
|
+
RubyArray array = RubyArray.newArray(runtime, 36);
|
49
|
+
|
50
|
+
for (int i = 0; i < length; i++) {
|
51
|
+
char c = seq.charAt(i);
|
52
|
+
switch (c) {
|
53
|
+
case ',':
|
54
|
+
if (state == UNQUOTED) {
|
55
|
+
if (value.length() == 0) {
|
56
|
+
array.append(runtime.getNil());
|
57
|
+
}
|
58
|
+
else {
|
59
|
+
array.append(RubyString.newString(runtime, value));
|
60
|
+
value.setLength(0);
|
61
|
+
}
|
62
|
+
}
|
63
|
+
else if (state == IN_QUOTED) {
|
64
|
+
value.append(c);
|
65
|
+
}
|
66
|
+
else if (state == 2) {
|
67
|
+
array.append(RubyString.newString(runtime, value));
|
68
|
+
value.setLength(0);
|
69
|
+
state = UNQUOTED;
|
70
|
+
}
|
71
|
+
break;
|
72
|
+
case '"':
|
73
|
+
if (state == UNQUOTED) {
|
74
|
+
state = IN_QUOTED;
|
75
|
+
}
|
76
|
+
else if (state == IN_QUOTED) {
|
77
|
+
state = QUOTE_IN_QUOTED;
|
78
|
+
}
|
79
|
+
else if (state == QUOTE_IN_QUOTED) {
|
80
|
+
value.append(c); // escaped quote
|
81
|
+
state = IN_QUOTED;
|
82
|
+
}
|
83
|
+
break;
|
84
|
+
case '\r':
|
85
|
+
case '\n':
|
86
|
+
if (state == IN_QUOTED) {
|
87
|
+
value.append(c);
|
88
|
+
}
|
89
|
+
else {
|
90
|
+
i = length; // only parse first line if multiline
|
91
|
+
}
|
92
|
+
break;
|
93
|
+
default:
|
94
|
+
value.append(c);
|
95
|
+
break;
|
96
|
+
}
|
97
|
+
}
|
98
|
+
if (state == UNQUOTED) {
|
99
|
+
if (value.length() == 0) {
|
100
|
+
array.append(runtime.getNil());
|
101
|
+
}
|
102
|
+
else {
|
103
|
+
array.append(RubyString.newString(runtime, value));
|
104
|
+
value.setLength(0);
|
105
|
+
}
|
106
|
+
}
|
107
|
+
else if (state == QUOTE_IN_QUOTED) {
|
108
|
+
array.append(RubyString.newString(runtime, value));
|
109
|
+
}
|
110
|
+
return array;
|
111
|
+
}
|
112
|
+
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
require 'mkmf'
|
4
|
+
extension_name = 'csv_parser'
|
5
|
+
#dir_config(extension_name)
|
6
|
+
|
7
|
+
if RUBY_VERSION =~ /1.8/ then
|
8
|
+
$CPPFLAGS += " -DRUBY_18"
|
9
|
+
end
|
10
|
+
|
11
|
+
#if CONFIG["arch"] =~ /mswin32|mingw/
|
12
|
+
# $CFLAGS += " -march=i686"
|
13
|
+
#end
|
14
|
+
|
15
|
+
create_makefile(extension_name)
|
@@ -0,0 +1,103 @@
|
|
1
|
+
/*
|
2
|
+
* Copyright (c) Maarten Oelering, BrightCode BV
|
3
|
+
*/
|
4
|
+
|
5
|
+
#include "ruby.h"
|
6
|
+
#ifdef RUBY_18
|
7
|
+
#include "rubyio.h"
|
8
|
+
#else
|
9
|
+
#include "ruby/io.h"
|
10
|
+
#endif
|
11
|
+
|
12
|
+
/* default allocated size is 16 */
|
13
|
+
#define DEF_ARRAY_LEN 32
|
14
|
+
|
15
|
+
#define UNQUOTED 0
|
16
|
+
#define IN_QUOTED 1
|
17
|
+
#define QUOTE_IN_QUOTED 2
|
18
|
+
|
19
|
+
/*
|
20
|
+
static VALUE cFastestCSV;
|
21
|
+
*/
|
22
|
+
static VALUE mCsvParser;
|
23
|
+
|
24
|
+
static VALUE parse_line(VALUE self, VALUE str)
|
25
|
+
{
|
26
|
+
if (NIL_P(str))
|
27
|
+
return Qnil;
|
28
|
+
|
29
|
+
const char *ptr = RSTRING_PTR(str);
|
30
|
+
int len = (int) RSTRING_LEN(str); /* cast to prevent warning in 64-bit OS */
|
31
|
+
|
32
|
+
if (len == 0)
|
33
|
+
return Qnil;
|
34
|
+
|
35
|
+
VALUE array = rb_ary_new2(DEF_ARRAY_LEN);
|
36
|
+
char value[len]; /* field value, no longer than line */
|
37
|
+
int state = 0;
|
38
|
+
int index = 0;
|
39
|
+
int i;
|
40
|
+
char c;
|
41
|
+
for (i = 0; i < len; i++)
|
42
|
+
{
|
43
|
+
c = ptr[i];
|
44
|
+
switch (c)
|
45
|
+
{
|
46
|
+
case ',':
|
47
|
+
if (state == UNQUOTED) {
|
48
|
+
rb_ary_push(array, (index == 0 ? Qnil: rb_str_new(value, index)));
|
49
|
+
index = 0;
|
50
|
+
}
|
51
|
+
else if (state == IN_QUOTED) {
|
52
|
+
value[index++] = c;
|
53
|
+
}
|
54
|
+
else if (state == QUOTE_IN_QUOTED) {
|
55
|
+
rb_ary_push(array, rb_str_new(value, index));
|
56
|
+
index = 0;
|
57
|
+
state = UNQUOTED;
|
58
|
+
}
|
59
|
+
break;
|
60
|
+
case '"':
|
61
|
+
if (state == UNQUOTED) {
|
62
|
+
state = IN_QUOTED;
|
63
|
+
}
|
64
|
+
else if (state == 1) {
|
65
|
+
state = QUOTE_IN_QUOTED;
|
66
|
+
}
|
67
|
+
else if (state == QUOTE_IN_QUOTED) {
|
68
|
+
value[index++] = c; /* escaped quote */
|
69
|
+
state = IN_QUOTED;
|
70
|
+
}
|
71
|
+
break;
|
72
|
+
case 13: /* \r */
|
73
|
+
case 10: /* \n */
|
74
|
+
if (state == IN_QUOTED) {
|
75
|
+
value[index++] = c;
|
76
|
+
}
|
77
|
+
else {
|
78
|
+
i = len; /* only parse first line if multiline */
|
79
|
+
}
|
80
|
+
break;
|
81
|
+
default:
|
82
|
+
value[index++] = c;
|
83
|
+
}
|
84
|
+
}
|
85
|
+
|
86
|
+
if (state == UNQUOTED) {
|
87
|
+
rb_ary_push(array, (index == 0 ? Qnil: rb_str_new(value, index)));
|
88
|
+
}
|
89
|
+
else if (state == QUOTE_IN_QUOTED) {
|
90
|
+
rb_ary_push(array, rb_str_new(value, index));
|
91
|
+
}
|
92
|
+
return array;
|
93
|
+
}
|
94
|
+
|
95
|
+
void Init_csv_parser()
|
96
|
+
{
|
97
|
+
/*
|
98
|
+
cFastestCSV = rb_define_class("FastestCSV", rb_cObject);
|
99
|
+
rb_define_singleton_method(cFastestCSV, "parse_line", parse_line, 1);
|
100
|
+
*/
|
101
|
+
mCsvParser = rb_define_module("CsvParser");
|
102
|
+
rb_define_module_function(mCsvParser, "parse_line", parse_line, 1);
|
103
|
+
}
|
data/fastest-csv.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path('../lib/fastest-csv/version', __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |gem|
|
5
|
+
gem.authors = ["Maarten Oelering"]
|
6
|
+
gem.email = ["maarten@brightcode.nl"]
|
7
|
+
gem.description = %q{Fastest standard CSV parser for MRI Ruby and JRuby}
|
8
|
+
gem.summary = %q{Fastest standard CSV parser for MRI Ruby and JRuby}
|
9
|
+
gem.homepage = "https://github.com/brightcode/fastest-csv"
|
10
|
+
|
11
|
+
gem.files = `git ls-files`.split($\)
|
12
|
+
#gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
13
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
14
|
+
gem.name = "fastest-csv"
|
15
|
+
gem.require_paths = ["lib"]
|
16
|
+
gem.version = FastestCSV::VERSION
|
17
|
+
|
18
|
+
if RUBY_PLATFORM =~ /java/
|
19
|
+
gem.platform = "java"
|
20
|
+
gem.files << "lib/csv_parser.jar"
|
21
|
+
else
|
22
|
+
gem.extensions = ['ext/csv_parser/extconf.rb']
|
23
|
+
end
|
24
|
+
|
25
|
+
gem.add_development_dependency "rake-compiler"
|
26
|
+
end
|
data/lib/fastest-csv.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'fastest_csv'
|
data/lib/fastest_csv.rb
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
# This loads either csv_parser.so, csv_parser.bundle or
|
2
|
+
# csv_parser.jar, depending on your Ruby platform and OS
|
3
|
+
require 'csv_parser'
|
4
|
+
require 'stringio'
|
5
|
+
|
6
|
+
# Fast CSV parser using native code
|
7
|
+
class FastestCSV
|
8
|
+
|
9
|
+
if RUBY_PLATFORM =~ /java/
|
10
|
+
require 'jruby'
|
11
|
+
org.brightcode.CsvParserService.new.basicLoad(JRuby.runtime)
|
12
|
+
end
|
13
|
+
|
14
|
+
# Pass each line of the specified +path+ as array to the provided +block+
|
15
|
+
def self.foreach(path, &block)
|
16
|
+
open(path) do |reader|
|
17
|
+
reader.each(&block)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
# Opens a csv file. Pass a FastestCSV instance to the provided block,
|
22
|
+
# or return it when no block is provided
|
23
|
+
def self.open(path, mode = "rb")
|
24
|
+
csv = new(File.open(path, mode))
|
25
|
+
if block_given?
|
26
|
+
begin
|
27
|
+
yield csv
|
28
|
+
ensure
|
29
|
+
csv.close
|
30
|
+
end
|
31
|
+
else
|
32
|
+
csv
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Read all lines from the specified +path+ into an array of arrays
|
37
|
+
def self.read(path)
|
38
|
+
open(path, "rb") { |csv| csv.read }
|
39
|
+
end
|
40
|
+
|
41
|
+
# Alias for FastestCSV.read
|
42
|
+
def self.readlines(path)
|
43
|
+
read(path)
|
44
|
+
end
|
45
|
+
|
46
|
+
# Read all lines from the specified String into an array of arrays
|
47
|
+
def self.parse(data, &block)
|
48
|
+
csv = new(StringIO.new(data))
|
49
|
+
if block.nil?
|
50
|
+
begin
|
51
|
+
csv.read
|
52
|
+
ensure
|
53
|
+
csv.close
|
54
|
+
end
|
55
|
+
else
|
56
|
+
csv.each(&block)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.parse_line(line)
|
61
|
+
::CsvParser.parse_line(line)
|
62
|
+
end
|
63
|
+
|
64
|
+
# Create new FastestCSV wrapping the specified IO object
|
65
|
+
def initialize(io)
|
66
|
+
@io = io
|
67
|
+
end
|
68
|
+
|
69
|
+
# Read from the wrapped IO passing each line as array to the specified block
|
70
|
+
def each
|
71
|
+
while row = shift
|
72
|
+
yield row
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Read all remaining lines from the wrapped IO into an array of arrays
|
77
|
+
def read
|
78
|
+
table = Array.new
|
79
|
+
each {|row| table << row}
|
80
|
+
table
|
81
|
+
end
|
82
|
+
alias_method :readlines, :read
|
83
|
+
|
84
|
+
# Read next line from the wrapped IO and return as array or nil at EOF
|
85
|
+
def shift
|
86
|
+
if line = @io.gets
|
87
|
+
::CsvParser.parse_line(line)
|
88
|
+
else
|
89
|
+
nil
|
90
|
+
end
|
91
|
+
end
|
92
|
+
alias_method :gets, :shift
|
93
|
+
alias_method :readline, :shift
|
94
|
+
|
95
|
+
# Close the wrapped IO
|
96
|
+
def close
|
97
|
+
@io.close
|
98
|
+
end
|
99
|
+
|
100
|
+
def closed?
|
101
|
+
@io.closed?
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
class String
|
106
|
+
# Equivalent to <tt>FasterCSV::parse_line(self)</tt>
|
107
|
+
def parse_csv
|
108
|
+
::CsvParser.parse_line(self)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
@@ -0,0 +1,126 @@
|
|
1
|
+
#
|
2
|
+
# Tests copied from faster_csv by James Edward Gray II
|
3
|
+
#
|
4
|
+
|
5
|
+
require 'test/unit'
|
6
|
+
require 'fastest_csv'
|
7
|
+
|
8
|
+
#
|
9
|
+
# Following tests are my interpretation of the
|
10
|
+
# {CSV RCF}[http://www.ietf.org/rfc/rfc4180.txt]. I only deviate from that
|
11
|
+
# document in one place (intentionally) and that is to make the default row
|
12
|
+
# separator <tt>$/</tt>.
|
13
|
+
#
|
14
|
+
class TestCSVParsing < Test::Unit::TestCase
|
15
|
+
|
16
|
+
def test_mastering_regex_example
|
17
|
+
ex = %Q{Ten Thousand,10000, 2710 ,,"10,000","It's ""10 Grand"", baby",10K}
|
18
|
+
assert_equal( [ "Ten Thousand", "10000", " 2710 ", nil, "10,000",
|
19
|
+
"It's \"10 Grand\", baby", "10K" ],
|
20
|
+
CsvParser.parse_line(ex) )
|
21
|
+
end
|
22
|
+
|
23
|
+
# Pulled from: http://www.ruby-lang.org/cgi-bin/cvsweb.cgi/ruby/test/csv/test_csv.rb?rev=1.12.2.2;content-type=text%2Fplain
|
24
|
+
def test_std_lib_csv
|
25
|
+
[ ["\t", ["\t"]],
|
26
|
+
["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]],
|
27
|
+
["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]],
|
28
|
+
["\"\"\"\n\",\"\"\"\n\"", ["\"\n", "\"\n"]],
|
29
|
+
["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]],
|
30
|
+
["\"\"", [""]],
|
31
|
+
["foo,\"\"\"\",baz", ["foo", "\"", "baz"]],
|
32
|
+
["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]],
|
33
|
+
["foo,\"\r\",baz", ["foo", "\r", "baz"]],
|
34
|
+
["foo,\"\",baz", ["foo", "", "baz"]],
|
35
|
+
["\",\"", [","]],
|
36
|
+
["foo", ["foo"]],
|
37
|
+
[",,", [nil, nil, nil]],
|
38
|
+
[",", [nil, nil]],
|
39
|
+
["foo,\"\n\",baz", ["foo", "\n", "baz"]],
|
40
|
+
["foo,,baz", ["foo", nil, "baz"]],
|
41
|
+
["\"\"\"\r\",\"\"\"\r\"", ["\"\r", "\"\r"]],
|
42
|
+
["\",\",\",\"", [",", ","]],
|
43
|
+
["foo,bar,", ["foo", "bar", nil]],
|
44
|
+
[",foo,bar", [nil, "foo", "bar"]],
|
45
|
+
["foo,bar", ["foo", "bar"]],
|
46
|
+
[";", [";"]],
|
47
|
+
["\t,\t", ["\t", "\t"]],
|
48
|
+
["foo,\"\r\n\r\",baz", ["foo", "\r\n\r", "baz"]],
|
49
|
+
["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]],
|
50
|
+
["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]],
|
51
|
+
[";,;", [";", ";"]] ].each do |csv_test|
|
52
|
+
assert_equal(csv_test.last, CsvParser.parse_line(csv_test.first))
|
53
|
+
end
|
54
|
+
|
55
|
+
[ ["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]],
|
56
|
+
["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]],
|
57
|
+
["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]],
|
58
|
+
["\"\"", [""]],
|
59
|
+
["foo,\"\"\"\",baz", ["foo", "\"", "baz"]],
|
60
|
+
["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]],
|
61
|
+
["foo,\"\r\",baz", ["foo", "\r", "baz"]],
|
62
|
+
["foo,\"\",baz", ["foo", "", "baz"]],
|
63
|
+
["foo", ["foo"]],
|
64
|
+
[",,", [nil, nil, nil]],
|
65
|
+
[",", [nil, nil]],
|
66
|
+
["foo,\"\n\",baz", ["foo", "\n", "baz"]],
|
67
|
+
["foo,,baz", ["foo", nil, "baz"]],
|
68
|
+
["foo,bar", ["foo", "bar"]],
|
69
|
+
["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]],
|
70
|
+
["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]] ].each do |csv_test|
|
71
|
+
assert_equal(csv_test.last, CsvParser.parse_line(csv_test.first))
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# From: http://ruby-talk.org/cgi-bin/scat.rb/ruby/ruby-core/6496
|
76
|
+
def test_aras_edge_cases
|
77
|
+
[ [%Q{a,b}, ["a", "b"]],
|
78
|
+
[%Q{a,"""b"""}, ["a", "\"b\""]],
|
79
|
+
[%Q{a,"""b"}, ["a", "\"b"]],
|
80
|
+
[%Q{a,"b"""}, ["a", "b\""]],
|
81
|
+
[%Q{a,"\nb"""}, ["a", "\nb\""]],
|
82
|
+
[%Q{a,"""\nb"}, ["a", "\"\nb"]],
|
83
|
+
[%Q{a,"""\nb\n"""}, ["a", "\"\nb\n\""]],
|
84
|
+
[%Q{a,"""\nb\n""",\nc}, ["a", "\"\nb\n\"", nil]],
|
85
|
+
[%Q{a,,,}, ["a", nil, nil, nil]],
|
86
|
+
[%Q{,}, [nil, nil]],
|
87
|
+
[%Q{"",""}, ["", ""]],
|
88
|
+
[%Q{""""}, ["\""]],
|
89
|
+
[%Q{"""",""}, ["\"",""]],
|
90
|
+
[%Q{,""}, [nil,""]],
|
91
|
+
[%Q{,"\r"}, [nil,"\r"]],
|
92
|
+
[%Q{"\r\n,"}, ["\r\n,"]],
|
93
|
+
[%Q{"\r\n,",}, ["\r\n,", nil]] ].each do |edge_case|
|
94
|
+
assert_equal(edge_case.last, CsvParser.parse_line(edge_case.first))
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def test_james_edge_cases
|
99
|
+
# A read at eof? should return nil.
|
100
|
+
assert_equal(nil, CsvParser.parse_line(""))
|
101
|
+
#
|
102
|
+
# With CSV it's impossible to tell an empty line from a line containing a
|
103
|
+
# single +nil+ field. The standard CSV library returns <tt>[nil]</tt>
|
104
|
+
# in these cases, but <tt>Array.new</tt> makes more sense to me.
|
105
|
+
#
|
106
|
+
#assert_equal(Array.new, FastestCSV.parse_line("\n1,2,3\n"))
|
107
|
+
assert_equal([nil], CsvParser.parse_line("\n1,2,3\n"))
|
108
|
+
end
|
109
|
+
|
110
|
+
def test_rob_edge_cases
|
111
|
+
[ [%Q{"a\nb"}, ["a\nb"]],
|
112
|
+
[%Q{"\n\n\n"}, ["\n\n\n"]],
|
113
|
+
[%Q{a,"b\n\nc"}, ['a', "b\n\nc"]],
|
114
|
+
[%Q{,"\r\n"}, [nil,"\r\n"]],
|
115
|
+
[%Q{,"\r\n."}, [nil,"\r\n."]],
|
116
|
+
[%Q{"a\na","one newline"}, ["a\na", 'one newline']],
|
117
|
+
[%Q{"a\n\na","two newlines"}, ["a\n\na", 'two newlines']],
|
118
|
+
[%Q{"a\r\na","one CRLF"}, ["a\r\na", 'one CRLF']],
|
119
|
+
[%Q{"a\r\n\r\na","two CRLFs"}, ["a\r\n\r\na", 'two CRLFs']],
|
120
|
+
[%Q{with blank,"start\n\nfinish"\n}, ['with blank', "start\n\nfinish"]],
|
121
|
+
].each do |edge_case|
|
122
|
+
assert_equal(edge_case.last, CsvParser.parse_line(edge_case.first))
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
#
|
2
|
+
# Tests copied from faster_csv by James Edward Gray II
|
3
|
+
#
|
4
|
+
|
5
|
+
require 'test/unit'
|
6
|
+
require 'fastest_csv'
|
7
|
+
|
8
|
+
class TestFastestCSVInterface < Test::Unit::TestCase
|
9
|
+
|
10
|
+
def setup
|
11
|
+
@path = File.join(File.dirname(__FILE__), "temp_test_data.csv")
|
12
|
+
|
13
|
+
File.open(@path, "w") do |file|
|
14
|
+
file << "1,2,3\r\n"
|
15
|
+
file << "4,5\r\n"
|
16
|
+
end
|
17
|
+
|
18
|
+
@expected = [%w{1 2 3}, %w{4 5}]
|
19
|
+
end
|
20
|
+
|
21
|
+
def teardown
|
22
|
+
File.unlink(@path)
|
23
|
+
end
|
24
|
+
|
25
|
+
### Test Read Interface ###
|
26
|
+
|
27
|
+
def test_foreach
|
28
|
+
FastestCSV.foreach(@path) do |row|
|
29
|
+
assert_equal(@expected.shift, row)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def test_open_and_close
|
34
|
+
csv = FastestCSV.open(@path, "r+")
|
35
|
+
assert_not_nil(csv)
|
36
|
+
assert_instance_of(FastestCSV, csv)
|
37
|
+
assert_equal(false, csv.closed?)
|
38
|
+
csv.close
|
39
|
+
assert(csv.closed?)
|
40
|
+
|
41
|
+
ret = FastestCSV.open(@path) do |csv|
|
42
|
+
assert_instance_of(FastestCSV, csv)
|
43
|
+
"Return value."
|
44
|
+
end
|
45
|
+
assert(csv.closed?)
|
46
|
+
assert_equal("Return value.", ret)
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_parse
|
50
|
+
data = File.read(@path)
|
51
|
+
assert_equal( @expected,
|
52
|
+
FastestCSV.parse(data) )
|
53
|
+
|
54
|
+
FastestCSV.parse(data) do |row|
|
55
|
+
assert_equal(@expected.shift, row)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
#def test_parse_line
|
60
|
+
# row = FasterCSV.parse_line("1;2;3", :col_sep => ";")
|
61
|
+
# assert_not_nil(row)
|
62
|
+
# assert_instance_of(Array, row)
|
63
|
+
# assert_equal(%w{1 2 3}, row)
|
64
|
+
#
|
65
|
+
# # shortcut interface
|
66
|
+
# row = "1;2;3".parse_csv(:col_sep => ";")
|
67
|
+
# assert_not_nil(row)
|
68
|
+
# assert_instance_of(Array, row)
|
69
|
+
# assert_equal(%w{1 2 3}, row)
|
70
|
+
#end
|
71
|
+
|
72
|
+
def test_parse_line_with_empty_lines
|
73
|
+
assert_equal(nil, FastestCSV.parse_line("")) # to signal eof
|
74
|
+
#assert_equal(Array.new, FastestCSV.parse_line("\n1,2,3"))
|
75
|
+
assert_equal([nil], FastestCSV.parse_line("\n1,2,3"))
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_read_and_readlines
|
79
|
+
assert_equal( @expected,
|
80
|
+
FastestCSV.read(@path) )
|
81
|
+
assert_equal( @expected,
|
82
|
+
FastestCSV.readlines(@path))
|
83
|
+
|
84
|
+
|
85
|
+
data = FastestCSV.open(@path) do |csv|
|
86
|
+
csv.read
|
87
|
+
end
|
88
|
+
assert_equal(@expected, data)
|
89
|
+
data = FastestCSV.open(@path) do |csv|
|
90
|
+
csv.readlines
|
91
|
+
end
|
92
|
+
assert_equal(@expected, data)
|
93
|
+
end
|
94
|
+
|
95
|
+
#def test_table
|
96
|
+
# table = FastestCSV.table(@path)
|
97
|
+
# assert_instance_of(FastestCSV::Table, table)
|
98
|
+
# assert_equal([[:"1", :"2", :"3"], [4, 5, nil]], table.to_a)
|
99
|
+
#end
|
100
|
+
|
101
|
+
def test_shift # aliased as gets() and readline()
|
102
|
+
FastestCSV.open(@path, "r+") do |csv|
|
103
|
+
assert_equal(@expected.shift, csv.shift)
|
104
|
+
assert_equal(@expected.shift, csv.shift)
|
105
|
+
assert_equal(nil, csv.shift)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def test_long_line # ruby's regex parser may have problems with long rows
|
110
|
+
File.unlink(@path)
|
111
|
+
|
112
|
+
long_field_length = 2800
|
113
|
+
File.open(@path, "w") do |file|
|
114
|
+
file << "1,2,#{'3' * long_field_length}\r\n"
|
115
|
+
end
|
116
|
+
@expected = [%w{1 2} + ['3' * long_field_length]]
|
117
|
+
test_shift
|
118
|
+
end
|
119
|
+
end
|
metadata
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fastest-csv
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 0.0.2
|
6
|
+
platform: java
|
7
|
+
authors:
|
8
|
+
- Maarten Oelering
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2013-01-13 00:00:00 Z
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: rake-compiler
|
17
|
+
prerelease: false
|
18
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
19
|
+
none: false
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
type: :development
|
25
|
+
version_requirements: *id001
|
26
|
+
description: Fastest standard CSV parser for MRI Ruby and JRuby
|
27
|
+
email:
|
28
|
+
- maarten@brightcode.nl
|
29
|
+
executables: []
|
30
|
+
|
31
|
+
extensions: []
|
32
|
+
|
33
|
+
extra_rdoc_files: []
|
34
|
+
|
35
|
+
files:
|
36
|
+
- .gitignore
|
37
|
+
- Gemfile
|
38
|
+
- LICENSE
|
39
|
+
- README.md
|
40
|
+
- Rakefile
|
41
|
+
- ext/csv_parser/CsvParserService.java
|
42
|
+
- ext/csv_parser/extconf.rb
|
43
|
+
- ext/csv_parser/parser.c
|
44
|
+
- fastest-csv.gemspec
|
45
|
+
- lib/fastest-csv.rb
|
46
|
+
- lib/fastest-csv/version.rb
|
47
|
+
- lib/fastest_csv.rb
|
48
|
+
- test/tc_csv_parsing.rb
|
49
|
+
- test/tc_interface.rb
|
50
|
+
- lib/csv_parser.jar
|
51
|
+
homepage: https://github.com/brightcode/fastest-csv
|
52
|
+
licenses: []
|
53
|
+
|
54
|
+
post_install_message:
|
55
|
+
rdoc_options: []
|
56
|
+
|
57
|
+
require_paths:
|
58
|
+
- lib
|
59
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
60
|
+
none: false
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: "0"
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
none: false
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: "0"
|
71
|
+
requirements: []
|
72
|
+
|
73
|
+
rubyforge_project:
|
74
|
+
rubygems_version: 1.8.24
|
75
|
+
signing_key:
|
76
|
+
specification_version: 3
|
77
|
+
summary: Fastest standard CSV parser for MRI Ruby and JRuby
|
78
|
+
test_files:
|
79
|
+
- test/tc_csv_parsing.rb
|
80
|
+
- test/tc_interface.rb
|